import numpy as np
import pandas as pd
from glob import glob
import torch
from torch import optim
import torchvision
import timm
from tqdm import tqdm
import seaborn as sns
from PIL import Image
import random
import os
from torchvision.transforms import v2
from torch.utils.data import Dataset , DataLoader
import cv2
import matplotlib.pyplot as plt
import albumentations as A
from albumentations import (
Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip,
RandomBrightnessContrast, Rotate, ShiftScaleRotate, Transpose
)
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import KFold , StratifiedKFold , StratifiedGroupKFold
import torch.nn as nn
from contextlib import contextmanager
from torch.optim import Adam, SGD , AdamW
from functools import partial
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
import time
from sklearn.metrics import roc_auc_score
import math
# from catalyst.data import BalanceClassSampler
import urllib.request as urlreq
from torchvision.transforms import ToPILImage
from accelerate.tracking import GeneralTracker
from accelerate import Accelerator, notebook_launcher
from accelerate.utils import set_seed
from sklearn.metrics import log_loss
import psutil
import warnings
pd.set_option('display.max_columns' , None)
warnings.filterwarnings('ignore')
def wandb_log(**kwargs):
for k, v in kwargs.items():
wandb.log({k: v})
memory_info = psutil.virtual_memory()
# Print the used and available memory
print(f"Total Memory: {memory_info.total / (1024 ** 3):.2f} GB")
print(f"Available Memory: {memory_info.available / (1024 ** 3):.2f} GB")
print(f"Used Memory: {memory_info.used / (1024 ** 3):.2f} GB")
print(f"Percentage Used: {memory_info.percent}%")
Total Memory: 31.36 GB Available Memory: 30.00 GB Used Memory: 0.91 GB Percentage Used: 4.3%
train = pd.read_csv('/kaggle/input/isic-2024/isic-2024/train-metadata.csv')
val = pd.read_csv('/kaggle/input/isic-2024/isic-2024/test-metadata.csv')
train.head()
| isic_id | target | patient_id | age_approx | sex | anatom_site_general | clin_size_long_diam_mm | image_type | tbp_tile_type | tbp_lv_A | tbp_lv_Aext | tbp_lv_B | tbp_lv_Bext | tbp_lv_C | tbp_lv_Cext | tbp_lv_H | tbp_lv_Hext | tbp_lv_L | tbp_lv_Lext | tbp_lv_areaMM2 | tbp_lv_area_perim_ratio | tbp_lv_color_std_mean | tbp_lv_deltaA | tbp_lv_deltaB | tbp_lv_deltaL | tbp_lv_deltaLB | tbp_lv_deltaLBnorm | tbp_lv_eccentricity | tbp_lv_location | tbp_lv_location_simple | tbp_lv_minorAxisMM | tbp_lv_nevi_confidence | tbp_lv_norm_border | tbp_lv_norm_color | tbp_lv_perimeterMM | tbp_lv_radial_color_std_max | tbp_lv_stdL | tbp_lv_stdLExt | tbp_lv_symm_2axis | tbp_lv_symm_2axis_angle | tbp_lv_x | tbp_lv_y | tbp_lv_z | attribution | copyright_license | lesion_id | iddx_full | iddx_1 | iddx_2 | iddx_3 | iddx_4 | iddx_5 | mel_mitotic_index | mel_thick_mm | tbp_lv_dnn_lesion_confidence | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ISIC_0015670 | 0 | IP_1235828 | 60.0 | male | lower extremity | 3.04 | TBP tile: close-up | 3D: white | 20.244422 | 16.261975 | 26.922447 | 23.954773 | 33.684638 | 28.953117 | 53.058545 | 55.828924 | 54.367448 | 62.025701 | 3.152561 | 27.476170 | 0.00000 | 3.982447 | 2.967674 | -7.658253 | 8.360566 | 5.784302 | 0.901302 | Right Leg - Upper | Right Leg | 1.543016 | 2.628592e-03 | 7.091360 | 0.000000 | 9.307003 | 0.00000 | 2.036195 | 2.637780 | 0.590476 | 85 | -182.703552 | 613.493652 | -42.427948 | Memorial Sloan Kettering Cancer Center | CC-BY | NaN | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 97.517282 |
| 1 | ISIC_0015845 | 0 | IP_8170065 | 60.0 | male | head/neck | 1.10 | TBP tile: close-up | 3D: white | 31.712570 | 25.364740 | 26.331000 | 24.549290 | 41.219030 | 35.299260 | 39.702910 | 44.064040 | 48.861520 | 55.362360 | 0.919497 | 12.235290 | 0.00000 | 6.347830 | 1.781713 | -6.500838 | 6.839008 | 4.987244 | 0.639885 | Head & Neck | Head & Neck | 0.821918 | 1.334303e-07 | 2.116402 | 0.000000 | 3.354148 | 0.00000 | 0.853227 | 3.912844 | 0.285714 | 55 | -0.078308 | 1575.687000 | 57.174500 | Memorial Sloan Kettering Cancer Center | CC-BY | IL_6727506 | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 3.141455 |
| 2 | ISIC_0015864 | 0 | IP_6724798 | 60.0 | male | posterior torso | 3.40 | TBP tile: close-up | 3D: XP | 22.575830 | 17.128170 | 37.970460 | 33.485410 | 44.174920 | 37.611800 | 59.265850 | 62.909730 | 53.961180 | 61.670520 | 3.265153 | 24.184620 | 0.00000 | 5.447655 | 4.485044 | -7.709336 | 9.092376 | 6.290359 | 0.932147 | Torso Back Top Third | Torso Back | 1.194905 | 2.959177e-04 | 4.798335 | 0.000000 | 8.886309 | 0.00000 | 1.743651 | 1.950777 | 0.361905 | 105 | 123.649700 | 1472.010000 | 232.908900 | Memorial Sloan Kettering Cancer Center | CC-BY | NaN | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 99.804040 |
| 3 | ISIC_0015902 | 0 | IP_4111386 | 65.0 | male | anterior torso | 3.22 | TBP tile: close-up | 3D: XP | 14.242329 | 12.164757 | 21.448144 | 21.121356 | 25.746200 | 24.374023 | 56.414429 | 60.060388 | 18.649518 | 23.314841 | 6.079940 | 14.889242 | 0.51452 | 2.077572 | 0.326788 | -4.665323 | 4.783413 | 6.400196 | 0.654458 | Torso Front Top Half | Torso Front | 2.481328 | 2.198945e+01 | 1.975874 | 1.771705 | 9.514499 | 0.66469 | 1.258541 | 1.573733 | 0.209581 | 130 | -141.024780 | 1442.185791 | 58.359802 | ACEMID MIA | CC-0 | NaN | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 99.989998 |
| 4 | ISIC_0024200 | 0 | IP_8313778 | 55.0 | male | anterior torso | 2.73 | TBP tile: close-up | 3D: white | 24.725520 | 20.057470 | 26.464900 | 25.710460 | 36.217980 | 32.608740 | 46.946070 | 52.041180 | 46.276310 | 54.855740 | 2.101708 | 19.902560 | 0.00000 | 4.668053 | 0.754434 | -8.579431 | 9.148495 | 6.531302 | 0.946448 | Torso Front Top Half | Torso Front | 0.929916 | 1.378832e-03 | 3.658854 | 0.000000 | 6.467562 | 0.00000 | 2.085409 | 2.480509 | 0.313433 | 20 | -72.315640 | 1488.720000 | 21.428960 | Memorial Sloan Kettering Cancer Center | CC-BY | NaN | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 70.442510 |
txt_to_csv = False
DIR_PATH = "/kaggle/input/isic-2024/isic-2024"
OUTPUT_DIR = "/kaggle/working/"
TRAIN_DIR = "/kaggle/input/isic-2024/isic-2024/train-image/image/"
# os.environ['CUDA_VISIBLE_DEVICES'] = "0,1"
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# !export PYTORCH_CUDA_ALLOC_CONF=garbage_collection_threshold:0.6,max_split_size_mb:128
class CFG :
competition = "ISIC_2024"
seed = 42
n_fold = 5
target_col = 'target'
train=True
inference=False
pseudo_labeling = False
num_classes = 2 #binary class
trn_fold=[2,]
debug=False
apex=False
print_freq=18 #every how many batch the scores get showed
num_workers=4
# model_name="eva02_large_patch14_448.mim_m38m_ft_in22k_in1k"
model_name= "tf_efficientnetv2_m.in21k"
size=384
scheduler='CosineAnnealingWarmRestarts'
epochs=10
lr=1e-4
min_lr=1e-6
T_0=10 # CosineAnnealingWarmRestarts
batch_size=20
weight_decay=1e-6
max_grad_norm=1000
gradient_accumulation_steps = 1
add_only_negatives = True
weighted_loss = True
WandB = True
if CFG.pseudo_labeling :
ps = pd.read_csv('/kaggle/input/best-result/b4_nTTA_2epochs_aug (2).csv')
ps.rename(columns = {"label" : "target"} , inplace = True)
#confident pseudo labeling
to_add = ps[(ps['target']>0.9) | (ps['target']<0.1)]
# print(to_add.shape)
to_add["target"] = [1 if i>0.9 else 0 for i in to_add['target']]
print(to_add["target"].value_counts())
shape_before = train.shape
if CFG.add_only_negatives == True :
to_add= to_add[to_add['target'] == 0 ]
train = pd.concat([train , to_add] , axis=0)
shape_after = train.shape
print(f"The shape of the train set have moved from {shape_before} => {shape_after}")
train.reset_index(drop = True , inplace =True , )
def get_score(y_true, y_pred):
num_classes = 2
total_log_loss = 0.0
y_true = np.array([[0, 1] if i == 1 else [1, 0] for i in y_true])
# print(y_true)
# print(y_pred)
for class_idx in range(num_classes):
class_true = y_true[:,class_idx]
class_pred = y_pred[:, class_idx]
print(class_true , class_pred)
class_log_loss = log_loss(class_true, class_pred)
total_log_loss += class_log_loss
return total_log_loss
# mean_log_loss = total_log_loss / num_classes
# return mean_log_loss
# def get_score(y_true, y_pred):
# # Ensure y_true and y_pred are 1D arrays
# y_true = y_true.flatten()
# y_pred = y_pred.flatten()
# # Calculate the log loss directly
# total_log_loss = log_loss(y_true, y_pred)
@contextmanager
def timer(name):
t0 = time.time()
LOGGER.info(f'[{name}] start')
yield
LOGGER.info(f'[{name}] done in {time.time() - t0:.0f} s.')
def init_logger(log_file=OUTPUT_DIR+'train.log'):
from logging import getLogger, INFO, FileHandler, Formatter, StreamHandler
logger = getLogger(__name__)
logger.setLevel(INFO)
handler1 = StreamHandler()
handler1.setFormatter(Formatter("%(message)s"))
handler2 = FileHandler(filename=log_file)
handler2.setFormatter(Formatter("%(message)s"))
logger.addHandler(handler1)
logger.addHandler(handler2)
return logger
LOGGER = init_logger()
def seed_torch(seed=42):
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
set_seed(seed)
torch.backends.cudnn.deterministic = True
seed_torch(seed=CFG.seed)
if CFG.debug:
CFG.epochs = 1
train = train.sample(n=3000, random_state=CFG.seed).reset_index(drop=True)
# test = test.sample(n=1000, random_state=CFG.seed).reset_index(drop=True)
print(f"The train file contains {train.shape[0]} elements")
print(f"The test file contains {val.shape[0]} elements")
The train file contains 401059 elements The test file contains 3 elements
sns.countplot(data = train , x = train["target"])
<Axes: xlabel='target', ylabel='count'>
train["target"].value_counts()
target 0 400666 1 393 Name: count, dtype: int64
# image_files1 = pd.read_csv('/kaggle/input/isic-2024-balanced-image-dataset/ISIC_2024_Train_Image_IDs.csv')['Image_File'].unique()
# # Split each unique value and get the third part
# third_parts = [file.split('/')[3] for file in image_files1]
# print(set(third_parts))
# image_files2 = pd.read_csv('/kaggle/input/isic-2024-balanced-image-dataset/ISIC_2024_balanced_valb_Image_IDs.csv')['Image_File'].unique()
# # Split each unique value and get the third part
# third_parts = [file.split('/')[3] for file in image_files2]
# print(set(third_parts))
# train_ext = pd.read_csv('/kaggle/input/isic-2024-balanced-image-dataset/ISIC_2024_Train_Image_IDs.csv')
# val_ext = pd.read_csv('/kaggle/input/isic-2024-balanced-image-dataset/ISIC_2024_balanced_valb_Image_IDs.csv')
# train_ext = pd.concat([train_ext , val_ext] , axis = 0)
# stock_pos = train_ext[train_ext['Label'] == 1]
# train['Image_File'] = TRAIN_DIR + train["isic_id"] + '.jpg'
# val['Image_File'] = TRAIN_DIR + val["isic_id"] + '.jpg'
# stock_pos.rename(columns = {"Label" : CFG.target_col} , inplace =True)
# merged = pd.concat([train[train[CFG.target_col] == 0 ][stock_pos.columns] , stock_pos] , axis=0).reset_index(drop = True)
# train = merged.copy(deep = True)
# train = merged.copy(deep = True)
train['Image_File'] = TRAIN_DIR + train["isic_id"] + '.jpg'
val['Image_File'] = TRAIN_DIR + val["isic_id"] + '.jpg'
if CFG.weighted_loss :
pos = train[CFG.target_col].value_counts().values[0]
neg = train[CFG.target_col].value_counts().values[1]
class_counts = [neg, pos]
total_count = sum(class_counts)
class_weights = [count / total_count for count in class_counts]
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float)
print(class_weights_tensor)
tensor([9.7991e-04, 9.9902e-01])
train.head()
| isic_id | target | patient_id | age_approx | sex | anatom_site_general | clin_size_long_diam_mm | image_type | tbp_tile_type | tbp_lv_A | tbp_lv_Aext | tbp_lv_B | tbp_lv_Bext | tbp_lv_C | tbp_lv_Cext | tbp_lv_H | tbp_lv_Hext | tbp_lv_L | tbp_lv_Lext | tbp_lv_areaMM2 | tbp_lv_area_perim_ratio | tbp_lv_color_std_mean | tbp_lv_deltaA | tbp_lv_deltaB | tbp_lv_deltaL | tbp_lv_deltaLB | tbp_lv_deltaLBnorm | tbp_lv_eccentricity | tbp_lv_location | tbp_lv_location_simple | tbp_lv_minorAxisMM | tbp_lv_nevi_confidence | tbp_lv_norm_border | tbp_lv_norm_color | tbp_lv_perimeterMM | tbp_lv_radial_color_std_max | tbp_lv_stdL | tbp_lv_stdLExt | tbp_lv_symm_2axis | tbp_lv_symm_2axis_angle | tbp_lv_x | tbp_lv_y | tbp_lv_z | attribution | copyright_license | lesion_id | iddx_full | iddx_1 | iddx_2 | iddx_3 | iddx_4 | iddx_5 | mel_mitotic_index | mel_thick_mm | tbp_lv_dnn_lesion_confidence | Image_File | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ISIC_0015670 | 0 | IP_1235828 | 60.0 | male | lower extremity | 3.04 | TBP tile: close-up | 3D: white | 20.244422 | 16.261975 | 26.922447 | 23.954773 | 33.684638 | 28.953117 | 53.058545 | 55.828924 | 54.367448 | 62.025701 | 3.152561 | 27.476170 | 0.00000 | 3.982447 | 2.967674 | -7.658253 | 8.360566 | 5.784302 | 0.901302 | Right Leg - Upper | Right Leg | 1.543016 | 2.628592e-03 | 7.091360 | 0.000000 | 9.307003 | 0.00000 | 2.036195 | 2.637780 | 0.590476 | 85 | -182.703552 | 613.493652 | -42.427948 | Memorial Sloan Kettering Cancer Center | CC-BY | NaN | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 97.517282 | /kaggle/input/isic-2024/isic-2024/train-image/... |
| 1 | ISIC_0015845 | 0 | IP_8170065 | 60.0 | male | head/neck | 1.10 | TBP tile: close-up | 3D: white | 31.712570 | 25.364740 | 26.331000 | 24.549290 | 41.219030 | 35.299260 | 39.702910 | 44.064040 | 48.861520 | 55.362360 | 0.919497 | 12.235290 | 0.00000 | 6.347830 | 1.781713 | -6.500838 | 6.839008 | 4.987244 | 0.639885 | Head & Neck | Head & Neck | 0.821918 | 1.334303e-07 | 2.116402 | 0.000000 | 3.354148 | 0.00000 | 0.853227 | 3.912844 | 0.285714 | 55 | -0.078308 | 1575.687000 | 57.174500 | Memorial Sloan Kettering Cancer Center | CC-BY | IL_6727506 | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 3.141455 | /kaggle/input/isic-2024/isic-2024/train-image/... |
| 2 | ISIC_0015864 | 0 | IP_6724798 | 60.0 | male | posterior torso | 3.40 | TBP tile: close-up | 3D: XP | 22.575830 | 17.128170 | 37.970460 | 33.485410 | 44.174920 | 37.611800 | 59.265850 | 62.909730 | 53.961180 | 61.670520 | 3.265153 | 24.184620 | 0.00000 | 5.447655 | 4.485044 | -7.709336 | 9.092376 | 6.290359 | 0.932147 | Torso Back Top Third | Torso Back | 1.194905 | 2.959177e-04 | 4.798335 | 0.000000 | 8.886309 | 0.00000 | 1.743651 | 1.950777 | 0.361905 | 105 | 123.649700 | 1472.010000 | 232.908900 | Memorial Sloan Kettering Cancer Center | CC-BY | NaN | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 99.804040 | /kaggle/input/isic-2024/isic-2024/train-image/... |
| 3 | ISIC_0015902 | 0 | IP_4111386 | 65.0 | male | anterior torso | 3.22 | TBP tile: close-up | 3D: XP | 14.242329 | 12.164757 | 21.448144 | 21.121356 | 25.746200 | 24.374023 | 56.414429 | 60.060388 | 18.649518 | 23.314841 | 6.079940 | 14.889242 | 0.51452 | 2.077572 | 0.326788 | -4.665323 | 4.783413 | 6.400196 | 0.654458 | Torso Front Top Half | Torso Front | 2.481328 | 2.198945e+01 | 1.975874 | 1.771705 | 9.514499 | 0.66469 | 1.258541 | 1.573733 | 0.209581 | 130 | -141.024780 | 1442.185791 | 58.359802 | ACEMID MIA | CC-0 | NaN | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 99.989998 | /kaggle/input/isic-2024/isic-2024/train-image/... |
| 4 | ISIC_0024200 | 0 | IP_8313778 | 55.0 | male | anterior torso | 2.73 | TBP tile: close-up | 3D: white | 24.725520 | 20.057470 | 26.464900 | 25.710460 | 36.217980 | 32.608740 | 46.946070 | 52.041180 | 46.276310 | 54.855740 | 2.101708 | 19.902560 | 0.00000 | 4.668053 | 0.754434 | -8.579431 | 9.148495 | 6.531302 | 0.946448 | Torso Front Top Half | Torso Front | 0.929916 | 1.378832e-03 | 3.658854 | 0.000000 | 6.467562 | 0.00000 | 2.085409 | 2.480509 | 0.313433 | 20 | -72.315640 | 1488.720000 | 21.428960 | Memorial Sloan Kettering Cancer Center | CC-BY | NaN | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 70.442510 | /kaggle/input/isic-2024/isic-2024/train-image/... |
def feature_engineering(df):
# New features to try...
df["lesion_size_ratio"] = df["tbp_lv_minorAxisMM"] / df["clin_size_long_diam_mm"]
df["lesion_shape_index"] = df["tbp_lv_areaMM2"] / (df["tbp_lv_perimeterMM"] ** 2)
df["hue_contrast"] = (df["tbp_lv_H"] - df["tbp_lv_Hext"]).abs()
df["luminance_contrast"] = (df["tbp_lv_L"] - df["tbp_lv_Lext"]).abs()
df["lesion_color_difference"] = np.sqrt(df["tbp_lv_deltaA"] ** 2 + df["tbp_lv_deltaB"] ** 2 + df["tbp_lv_deltaL"] ** 2)
df["border_complexity"] = df["tbp_lv_norm_border"] + df["tbp_lv_symm_2axis"]
df["color_uniformity"] = df["tbp_lv_color_std_mean"] / df["tbp_lv_radial_color_std_max"]
df["3d_position_distance"] = np.sqrt(df["tbp_lv_x"] ** 2 + df["tbp_lv_y"] ** 2 + df["tbp_lv_z"] ** 2)
df["perimeter_to_area_ratio"] = df["tbp_lv_perimeterMM"] / df["tbp_lv_areaMM2"]
df["lesion_visibility_score"] = df["tbp_lv_deltaLBnorm"] + df["tbp_lv_norm_color"]
# df["combined_anatomical_site"] = df["anatom_site_general"] + "_" + df["tbp_lv_location"]
df["symmetry_border_consistency"] = df["tbp_lv_symm_2axis"] * df["tbp_lv_norm_border"]
df["color_consistency"] = df["tbp_lv_stdL"] / df["tbp_lv_Lext"]
df["size_age_interaction"] = df["clin_size_long_diam_mm"] * df["age_approx"]
df["hue_color_std_interaction"] = df["tbp_lv_H"] * df["tbp_lv_color_std_mean"]
df["lesion_severity_index"] = (df["tbp_lv_norm_border"] + df["tbp_lv_norm_color"] + df["tbp_lv_eccentricity"]) / 3
df["shape_complexity_index"] = df["border_complexity"] + df["lesion_shape_index"]
df["color_contrast_index"] = df["tbp_lv_deltaA"] + df["tbp_lv_deltaB"] + df["tbp_lv_deltaL"] + df["tbp_lv_deltaLBnorm"]
df["log_lesion_area"] = np.log(df["tbp_lv_areaMM2"] + 1)
df["normalized_lesion_size"] = df["clin_size_long_diam_mm"] / df["age_approx"]
df["mean_hue_difference"] = (df["tbp_lv_H"] + df["tbp_lv_Hext"]) / 2
df["std_dev_contrast"] = np.sqrt((df["tbp_lv_deltaA"] ** 2 + df["tbp_lv_deltaB"] ** 2 + df["tbp_lv_deltaL"] ** 2) / 3)
df["color_shape_composite_index"] = (df["tbp_lv_color_std_mean"] + df["tbp_lv_area_perim_ratio"] + df["tbp_lv_symm_2axis"]) / 3
df["3d_lesion_orientation"] = np.arctan2(train["tbp_lv_y"], train["tbp_lv_x"])
df["overall_color_difference"] = (df["tbp_lv_deltaA"] + df["tbp_lv_deltaB"] + df["tbp_lv_deltaL"]) / 3
df["symmetry_perimeter_interaction"] = df["tbp_lv_symm_2axis"] * df["tbp_lv_perimeterMM"]
df["comprehensive_lesion_index"] = (df["tbp_lv_area_perim_ratio"] + df["tbp_lv_eccentricity"] + df["tbp_lv_norm_color"] + df["tbp_lv_symm_2axis"]) / 4
# 新しい特徴量
# 1. 複合的な形状指標
df['shape_complexity_ratio'] = df['tbp_lv_norm_border'] / df['lesion_shape_index']
# 2. 色彩の変動性
df['color_variability'] = df['tbp_lv_color_std_mean'] / df['tbp_lv_stdL']
# 3. 境界の非対称性
df['border_asymmetry'] = df['tbp_lv_norm_border'] * (1 - df['tbp_lv_symm_2axis'])
# 4. 3D位置と大きさの関係
df['3d_size_ratio'] = df['3d_position_distance'] / df['clin_size_long_diam_mm']
# 5. 年齢と病変の特徴の相互作用
df['age_lesion_interaction'] = df['age_approx'] * df['lesion_severity_index']
# 6. 色彩コントラストの複合指標
df['color_contrast_complexity'] = df['color_contrast_index'] * df['tbp_lv_radial_color_std_max']
# 7. 形状と色彩の複合指標
df['shape_color_composite'] = df['shape_complexity_index'] * df['color_uniformity']
# 8. 病変の相対的な大きさ
df['relative_lesion_size'] = df['clin_size_long_diam_mm'] / df['tbp_lv_minorAxisMM']
# 9. 境界の複雑さと色彩の変動性の相互作用
df['border_color_interaction'] = df['border_complexity'] * df['color_variability']
# 10. 3D位置の極座標表現
df['3d_radial_distance'] = np.sqrt(df['tbp_lv_x']**2 + df['tbp_lv_y']**2 + df['tbp_lv_z']**2)
df['3d_polar_angle'] = np.arccos(df['tbp_lv_z'] / df['3d_radial_distance'])
df['3d_azimuthal_angle'] = np.arctan2(df['tbp_lv_y'], df['tbp_lv_x'])
# 11. 病変の形状の複雑さと大きさの比
df['shape_size_ratio'] = df['shape_complexity_index'] / df['clin_size_long_diam_mm']
# 12. 色彩の非一様性と境界の複雑さの複合指標
df['color_border_complexity'] = df['color_uniformity'] * df['border_complexity']
# 13. 病変の可視性と大きさの相互作用
df['visibility_size_interaction'] = df['lesion_visibility_score'] * np.log(df['clin_size_long_diam_mm'])
# 14. 年齢調整済みの病変の特徴
df['age_adjusted_lesion_index'] = df['comprehensive_lesion_index'] / np.log(df['age_approx'])
# 15. 色彩コントラストの非線形変換
df['nonlinear_color_contrast'] = np.tanh(df['color_contrast_index'])
# 16. 病変の形状と位置の複合指標
df['shape_location_index'] = df['lesion_shape_index'] * df['3d_position_distance']
# 17. 境界の複雑さと非対称性の比率
df['border_complexity_asymmetry_ratio'] = df['border_complexity'] / (df['tbp_lv_symm_2axis'] + 1e-5)
# 18. 色彩の変動性と病変の大きさの相互作用
df['color_variability_size_interaction'] = df['color_variability'] * np.log(df['tbp_lv_areaMM2'])
# 19. 3D位置と病変の特徴の複合指標
df['3d_lesion_composite'] = df['3d_position_distance'] * df['comprehensive_lesion_index']
# 20. 病変の形状と色彩の非線形複合指標
df['nonlinear_shape_color_composite'] = np.tanh(df['shape_color_composite'])
new_num_cols = [
"lesion_size_ratio", "lesion_shape_index", "hue_contrast",
"luminance_contrast", "lesion_color_difference", "border_complexity",
"color_uniformity", "3d_position_distance", "perimeter_to_area_ratio",
"lesion_visibility_score", "symmetry_border_consistency", "color_consistency",
"size_age_interaction", "hue_color_std_interaction", "lesion_severity_index",
"shape_complexity_index", "color_contrast_index", "log_lesion_area",
"normalized_lesion_size", "mean_hue_difference", "std_dev_contrast",
"color_shape_composite_index", "3d_lesion_orientation", "overall_color_difference",
"symmetry_perimeter_interaction", "comprehensive_lesion_index","shape_complexity_ratio",
"color_variability", "border_asymmetry", "3d_size_ratio", "age_lesion_interaction",
"color_contrast_complexity", "shape_color_composite", "relative_lesion_size",
"border_color_interaction", "3d_radial_distance", "3d_polar_angle", "3d_azimuthal_angle",
"shape_size_ratio", "color_border_complexity", "visibility_size_interaction", "age_adjusted_lesion_index",
"nonlinear_color_contrast", "shape_location_index", "border_complexity_asymmetry_ratio", "color_variability_size_interaction",
"3d_lesion_composite", "nonlinear_shape_color_composite",
]
# new_cat_cols = ["combined_anatomical_site"]
return df, new_num_cols
numerical_columns = ['age_approx','clin_size_long_diam_mm','tbp_lv_A','tbp_lv_Aext','tbp_lv_B','tbp_lv_Bext','tbp_lv_C','tbp_lv_Cext','tbp_lv_H','tbp_lv_Hext',
'tbp_lv_L','tbp_lv_Lext','tbp_lv_areaMM2','tbp_lv_area_perim_ratio','tbp_lv_color_std_mean','tbp_lv_deltaA','tbp_lv_deltaB','tbp_lv_deltaL','tbp_lv_deltaLBnorm','tbp_lv_eccentricity','tbp_lv_minorAxisMM','tbp_lv_nevi_confidence','tbp_lv_norm_border','tbp_lv_norm_color',
'tbp_lv_perimeterMM','tbp_lv_radial_color_std_max','tbp_lv_stdL','tbp_lv_stdLExt','tbp_lv_symm_2axis','tbp_lv_symm_2axis_angle','tbp_lv_x','tbp_lv_y','tbp_lv_z','lesion_size_ratio','lesion_shape_index','hue_contrast','lesion_color_difference','color_uniformity',
'perimeter_to_area_ratio','lesion_visibility_score','symmetry_border_consistency','color_consistency','size_age_interaction','lesion_severity_index','color_contrast_index','log_lesion_area','normalized_lesion_size','mean_hue_difference','3d_lesion_orientation','overall_color_difference',
'symmetry_perimeter_interaction','comprehensive_lesion_index','shape_complexity_ratio','color_variability','border_asymmetry','3d_size_ratio','age_lesion_interaction','color_contrast_complexity','shape_color_composite','relative_lesion_size',
'border_color_interaction','3d_polar_angle','shape_size_ratio','visibility_size_interaction','age_adjusted_lesion_index','nonlinear_color_contrast','shape_location_index','border_complexity_asymmetry_ratio',
'color_variability_size_interaction','3d_lesion_composite','nonlinear_shape_color_composite'
]
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
test = val.copy(deep = True)
train, _= feature_engineering(train.copy())
test, _= feature_engineering(test.copy())
cat_features = ["anatom_site_general","tbp_lv_location_simple","tbp_lv_location"]
binary_column = "sex"
# train_num_df,to_drop = remove_collinear_features(train[numerical_columns].copy(),threshold=0.98)
# train = train.drop(columns=to_drop)
# numerical_columns = [col for col in numerical_columns if col not in to_drop]
train.replace([np.inf, -np.inf], np.nan, inplace=True)
test.replace([np.inf, -np.inf], np.nan, inplace=True)
train[numerical_columns] = train[numerical_columns].fillna(train[numerical_columns].median())
test[numerical_columns] = test[numerical_columns].fillna(test[numerical_columns].median())
train['n_images'] = train.patient_id.map(train.groupby(['patient_id']).isic_id.count())
test['n_images'] = test.patient_id.map(test.groupby(['patient_id']).isic_id.count())
train.loc[train['patient_id'] == -1, 'n_images'] = 1
train['n_images'] = np.log1p(train['n_images'].values)
test['n_images'] = np.log1p(test['n_images'].values)
numerical_columns += ["n_images"]
scaler = MinMaxScaler()
train[numerical_columns] = scaler.fit_transform(train[numerical_columns])
test[numerical_columns] = scaler.transform(test[numerical_columns])
simple_imputer = SimpleImputer(strategy='most_frequent')
train[cat_features+[binary_column]] = simple_imputer.fit_transform(train[cat_features+[binary_column]])
test[cat_features+[binary_column]] = simple_imputer.transform(test[cat_features+[binary_column]])
train[binary_column] = train[binary_column].map({'male': 0, 'female': 1})
test[binary_column] = test[binary_column].map({'male': 0, 'female': 1})
onehot_encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
train_encoded_df = pd.DataFrame(onehot_encoder.fit_transform(train[cat_features]))
test_encoded_df = pd.DataFrame(onehot_encoder.transform(test[cat_features]))
train_encoded_df.columns = onehot_encoder.get_feature_names_out(cat_features)
test_encoded_df.columns = onehot_encoder.get_feature_names_out(cat_features)
train = train.drop(columns=cat_features).reset_index(drop=True)
train = pd.concat([train, train_encoded_df], axis=1)
test = test.drop(columns=cat_features).reset_index(drop=True)
test = pd.concat([test, test_encoded_df], axis=1)
cat_features = list(onehot_encoder.get_feature_names_out(cat_features))
cat_features += ["sex"]
meta_features = numerical_columns + cat_features
train['Image_File']
0 /kaggle/input/isic-2024/isic-2024/train-image/...
1 /kaggle/input/isic-2024/isic-2024/train-image/...
2 /kaggle/input/isic-2024/isic-2024/train-image/...
3 /kaggle/input/isic-2024/isic-2024/train-image/...
4 /kaggle/input/isic-2024/isic-2024/train-image/...
...
401054 /kaggle/input/isic-2024/isic-2024/train-image/...
401055 /kaggle/input/isic-2024/isic-2024/train-image/...
401056 /kaggle/input/isic-2024/isic-2024/train-image/...
401057 /kaggle/input/isic-2024/isic-2024/train-image/...
401058 /kaggle/input/isic-2024/isic-2024/train-image/...
Name: Image_File, Length: 401059, dtype: object
class TrainDataset(Dataset) :
def __init__(self , df , hdf5 , meta_features , transform = None) :
if df is not None :
self.patient_0 = df.query(f"target == 0").reset_index(drop=True)
self.patient_1 = df.query(f"target == 1").reset_index(drop=True)
self.df = df
self.transform = transform
self.file_names = df["Image_File"].values
self.labels = df["target"].values
self.to_img = ToPILImage()
self.use_meta = meta_features is not None
self.meta_features = meta_features
def __len__(self) :
return len(self.df)
def __getitem__(self, idx):
file_path = self.file_names[idx]
row = self.df.iloc[idx]
image = cv2.imread(file_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
random_num = torch.randint(1, 10, (1,))
# image = augment_face(self.detector, self.predictor, file_path, random_num)
if self.transform:
augmented = self.transform(image=image)
image = augmented['image']
label = torch.tensor(self.labels[idx]).long()
if self.use_meta:
meta_data = row[self.meta_features].to_numpy().astype(np.float32)
data = ( torch.tensor(image).float(), torch.tensor(meta_data).float())
return image, label , file_path , data
def get_labels(self):
return list(self.labels)
class TestDataset(Dataset) :
def __init__(self , df , hdf5 , meta_features , transform = None) :
self.df = df
self.hdf5 = hdf5
self.use_meta = meta_features is not None
self.patient_ids = list(self.hdf5.keys())
self.transform = transform
self.file_names = df["Image_File"].values
self.meta_features = meta_features
def __len__(self) :
return len(self.df)
def __getitem__(self , idx) :
row = self.df.iloc[index]
file_path = self.file_names[idx]
print(file_path)
image_data = self.hdf5[self.patient_ids[index]][()]
image = cv2.imdecode(np.frombuffer(image_data, np.uint8), cv2.IMREAD_COLOR)
# image = cv2.imread(file_path)
image = cv2.cvtColor(image , cv2.COLOR_BGR2RGB)
if self.transform :
augmented = self.transform(image=image)
image = augmented['image']
if self.use_meta:
meta_data = row[self.meta_features].to_numpy().astype(np.float32)
data = ( torch.tensor(image).float(), torch.tensor(meta_data).float())
return image , data
return image
train.head()
| isic_id | target | patient_id | age_approx | sex | clin_size_long_diam_mm | image_type | tbp_tile_type | tbp_lv_A | tbp_lv_Aext | tbp_lv_B | tbp_lv_Bext | tbp_lv_C | tbp_lv_Cext | tbp_lv_H | tbp_lv_Hext | tbp_lv_L | tbp_lv_Lext | tbp_lv_areaMM2 | tbp_lv_area_perim_ratio | tbp_lv_color_std_mean | tbp_lv_deltaA | tbp_lv_deltaB | tbp_lv_deltaL | tbp_lv_deltaLB | tbp_lv_deltaLBnorm | tbp_lv_eccentricity | tbp_lv_minorAxisMM | tbp_lv_nevi_confidence | tbp_lv_norm_border | tbp_lv_norm_color | tbp_lv_perimeterMM | tbp_lv_radial_color_std_max | tbp_lv_stdL | tbp_lv_stdLExt | tbp_lv_symm_2axis | tbp_lv_symm_2axis_angle | tbp_lv_x | tbp_lv_y | tbp_lv_z | attribution | copyright_license | lesion_id | iddx_full | iddx_1 | iddx_2 | iddx_3 | iddx_4 | iddx_5 | mel_mitotic_index | mel_thick_mm | tbp_lv_dnn_lesion_confidence | Image_File | lesion_size_ratio | lesion_shape_index | hue_contrast | luminance_contrast | lesion_color_difference | border_complexity | color_uniformity | 3d_position_distance | perimeter_to_area_ratio | lesion_visibility_score | symmetry_border_consistency | color_consistency | size_age_interaction | hue_color_std_interaction | lesion_severity_index | shape_complexity_index | color_contrast_index | log_lesion_area | normalized_lesion_size | mean_hue_difference | std_dev_contrast | color_shape_composite_index | 3d_lesion_orientation | overall_color_difference | symmetry_perimeter_interaction | comprehensive_lesion_index | shape_complexity_ratio | color_variability | border_asymmetry | 3d_size_ratio | age_lesion_interaction | color_contrast_complexity | shape_color_composite | relative_lesion_size | border_color_interaction | 3d_radial_distance | 3d_polar_angle | 3d_azimuthal_angle | shape_size_ratio | color_border_complexity | visibility_size_interaction | age_adjusted_lesion_index | nonlinear_color_contrast | shape_location_index | border_complexity_asymmetry_ratio | color_variability_size_interaction | 3d_lesion_composite | nonlinear_shape_color_composite | n_images | anatom_site_general_anterior torso | anatom_site_general_head/neck | anatom_site_general_lower extremity | anatom_site_general_posterior torso | anatom_site_general_upper extremity | tbp_lv_location_simple_Head & Neck | tbp_lv_location_simple_Left Arm | tbp_lv_location_simple_Left Leg | tbp_lv_location_simple_Right Arm | tbp_lv_location_simple_Right Leg | tbp_lv_location_simple_Torso Back | tbp_lv_location_simple_Torso Front | tbp_lv_location_simple_Unknown | tbp_lv_location_Head & Neck | tbp_lv_location_Left Arm | tbp_lv_location_Left Arm - Lower | tbp_lv_location_Left Arm - Upper | tbp_lv_location_Left Leg | tbp_lv_location_Left Leg - Lower | tbp_lv_location_Left Leg - Upper | tbp_lv_location_Right Arm | tbp_lv_location_Right Arm - Lower | tbp_lv_location_Right Arm - Upper | tbp_lv_location_Right Leg | tbp_lv_location_Right Leg - Lower | tbp_lv_location_Right Leg - Upper | tbp_lv_location_Torso Back | tbp_lv_location_Torso Back Bottom Third | tbp_lv_location_Torso Back Middle Third | tbp_lv_location_Torso Back Top Third | tbp_lv_location_Torso Front | tbp_lv_location_Torso Front Bottom Half | tbp_lv_location_Torso Front Top Half | tbp_lv_location_Unknown | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ISIC_0015670 | 0 | IP_1235828 | 0.6875 | 0 | 0.074453 | TBP tile: close-up | 3D: white | 0.448560 | 0.549700 | 0.502444 | 0.376069 | 0.549810 | 0.402899 | 0.508448 | 0.267121 | 0.601787 | 0.547974 | 0.008153 | 0.218652 | 0.000000 | 0.387365 | 0.685141 | 0.836741 | 8.360566 | 0.101256 | 0.922243 | 0.070059 | 2.628592e-05 | 0.690918 | 0.00000 | 0.067335 | 0.000000 | 0.102225 | 0.080388 | 0.582086 | 0.485714 | 0.356776 | 0.566559 | 0.408087 | Memorial Sloan Kettering Cancer Center | CC-BY | NaN | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 97.517282 | /kaggle/input/isic-2024/isic-2024/train-image/... | 0.377312 | 0.306032 | 0.057723 | 7.658253 | 0.153458 | 7.681836 | 0.001929 | 641.525666 | 0.464388 | 0.074536 | 0.426332 | 0.093746 | 0.079887 | 0.000000 | 0.334336 | 7.718231 | 0.527696 | 0.195193 | 0.024389 | 0.404682 | 5.269909 | 9.355549 | 0.796815 | 0.723533 | 0.105880 | 0.215550 | 0.216541 | 0.000000 | 0.540615 | 0.124366 | 0.260433 | 0.499957 | 0.001206 | 0.245694 | 0.000000 | 641.525666 | 0.519977 | 1.860241 | 0.403224 | NaN | 0.062648 | 0.225713 | 0.999961 | 0.143238 | 0.164883 | 0.00000 | 0.114770 | 0.996913 | 0.657389 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 1 | ISIC_0015845 | 0 | IP_8170065 | 0.6875 | 0 | 0.003650 | TBP tile: close-up | 3D: white | 0.674860 | 0.747149 | 0.491697 | 0.391260 | 0.685050 | 0.552365 | 0.384152 | 0.152394 | 0.535710 | 0.465595 | 0.001462 | 0.019278 | 0.000000 | 0.438834 | 0.646551 | 0.867883 | 6.839008 | 0.072258 | 0.646281 | 0.030250 | 1.334303e-09 | 0.162262 | 0.00000 | 0.007756 | 0.000000 | 0.033828 | 0.131598 | 0.252621 | 0.314286 | 0.504132 | 0.893847 | 0.571023 | Memorial Sloan Kettering Cancer Center | CC-BY | IL_6727506 | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 3.141455 | /kaggle/input/isic-2024/isic-2024/train-image/... | 0.671535 | 0.862602 | 0.090871 | 6.500840 | 0.156607 | 2.402116 | 0.001929 | 1576.723962 | 0.585990 | 0.053190 | 0.058229 | 0.035666 | 0.019856 | 0.000000 | 0.061852 | 2.483847 | 0.555390 | 0.053754 | 0.003870 | 0.265244 | 5.345725 | 4.173668 | 0.750701 | 0.756171 | 0.009881 | 0.018873 | 0.021085 | 0.000000 | 0.259181 | 0.875064 | 0.072891 | 0.499957 | 0.001206 | 0.088039 | 0.000000 | 1576.723962 | 0.486381 | 1.570846 | 0.357300 | NaN | 0.004630 | 0.025995 | 0.999998 | 0.882084 | 0.004149 | 0.00000 | 0.129278 | 0.996913 | 0.479479 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2 | ISIC_0015864 | 0 | IP_6724798 | 0.6875 | 0 | 0.087591 | TBP tile: close-up | 3D: XP | 0.494565 | 0.568489 | 0.703178 | 0.619598 | 0.738108 | 0.606831 | 0.566217 | 0.336171 | 0.596911 | 0.543583 | 0.008491 | 0.175593 | 0.000000 | 0.419247 | 0.734515 | 0.835366 | 9.092376 | 0.119668 | 0.954805 | 0.050841 | 2.959177e-06 | 0.447253 | 0.00000 | 0.063125 | 0.000000 | 0.085311 | 0.052795 | 0.334987 | 0.600000 | 0.603966 | 0.858581 | 0.858501 | Memorial Sloan Kettering Cancer Center | CC-BY | NaN | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 99.804040 | /kaggle/input/isic-2024/isic-2024/train-image/... | 0.185612 | 0.366843 | 0.075925 | 7.709340 | 0.185190 | 5.160240 | 0.001929 | 1495.442825 | 0.424068 | 0.088089 | 0.174525 | 0.078557 | 0.091026 | 0.000000 | 0.216620 | 5.201588 | 0.589523 | 0.200096 | 0.028197 | 0.478441 | 6.033971 | 8.182175 | 0.737339 | 0.764476 | 0.057649 | 0.172135 | 0.125381 | 0.000000 | 0.572494 | 0.264886 | 0.179413 | 0.499957 | 0.001206 | 0.464085 | 0.000000 | 1495.442825 | 0.446995 | 1.486992 | 0.238232 | NaN | 0.074987 | 0.181627 | 1.000000 | 0.412719 | 0.208498 | 0.00000 | 0.245416 | 0.996913 | 0.810607 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 3 | ISIC_0015902 | 0 | IP_4111386 | 0.7500 | 0 | 0.081022 | TBP tile: close-up | 3D: XP | 0.330121 | 0.460827 | 0.402979 | 0.303669 | 0.407316 | 0.295051 | 0.539680 | 0.308385 | 0.173134 | 0.069389 | 0.016925 | 0.053995 | 0.051695 | 0.345916 | 0.599208 | 0.917270 | 4.783413 | 0.123664 | 0.661665 | 0.121860 | 2.198945e-01 | 0.147329 | 0.17717 | 0.069412 | 0.057844 | 0.057262 | 0.037652 | 0.170317 | 0.742857 | 0.390405 | 0.848437 | 0.572962 | ACEMID MIA | CC-0 | NaN | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 99.989998 | /kaggle/input/isic-2024/isic-2024/train-image/... | 0.700267 | 0.683752 | 0.075968 | 4.665323 | 0.057300 | 2.185455 | 0.001449 | 1450.239191 | 0.221867 | 0.138479 | 0.038648 | 0.164283 | 0.093760 | 29.026369 | 0.147492 | 2.252618 | 0.510845 | 0.292986 | 0.023673 | 0.446797 | 2.954563 | 5.204448 | 0.766225 | 0.701845 | 0.031795 | 0.073172 | 0.025163 | 0.148822 | 0.269298 | 0.271365 | 0.144972 | 0.510397 | 0.000654 | 0.077898 | 0.040456 | 1450.239191 | 0.485075 | 1.668272 | 0.102462 | 1.691708 | 0.093087 | 0.076888 | 0.999746 | 0.661763 | 0.074702 | 0.09984 | 0.160499 | 0.940654 | 0.664374 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
| 4 | ISIC_0024200 | 0 | IP_8313778 | 0.6250 | 0 | 0.063139 | TBP tile: close-up | 3D: white | 0.536985 | 0.632028 | 0.494130 | 0.420931 | 0.595283 | 0.488997 | 0.451561 | 0.230185 | 0.504685 | 0.459331 | 0.005005 | 0.119577 | 0.000000 | 0.402283 | 0.613124 | 0.811955 | 9.148495 | 0.128433 | 0.969901 | 0.036212 | 1.378832e-05 | 0.326168 | 0.00000 | 0.038916 | 0.000000 | 0.105071 | 0.074071 | 0.282586 | 0.114286 | 0.445845 | 0.864265 | 0.512548 | Memorial Sloan Kettering Cancer Center | CC-BY | NaN | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 70.442510 | /kaggle/input/isic-2024/isic-2024/train-image/... | 0.172334 | 0.476059 | 0.106167 | 8.579430 | 0.169487 | 3.972287 | 0.001929 | 1490.629394 | 0.486255 | 0.094542 | 0.113931 | 0.111047 | 0.063254 | 0.000000 | 0.158069 | 4.022532 | 0.497088 | 0.141714 | 0.023735 | 0.349728 | 5.655868 | 6.738664 | 0.758427 | 0.689332 | 0.032495 | 0.118431 | 0.075373 | 0.000000 | 0.461375 | 0.330097 | 0.125369 | 0.499957 | 0.001206 | 0.486625 | 0.000000 | 1490.629394 | 0.493560 | 1.619334 | 0.229006 | NaN | 0.063896 | 0.132898 | 0.998829 | 0.504179 | 0.153139 | 0.00000 | 0.201530 | 0.996913 | 0.639917 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
import h5py
hdf = h5py.File(f'/kaggle/input/isic-2024/isic-2024/train-image.hdf5', mode='r')
train_dataset = TrainDataset(train , hdf , meta_features)
fig, axes = plt.subplots(2, 4, figsize=(10, 7))
for i in range(2):
for j in range(4):
index = i * 3 + j
if index < len(train_dataset):
image, label , file_path , data = train_dataset[index]
axes[i, j].imshow(image)
if label.numpy() == 1:
axes[i, j].set_title("Fake", color="r")
else:
axes[i, j].set_title("Real", color="g")
axes[i, j].axis('off')
plt.tight_layout()
plt.show()
hdf = h5py.File(f'/kaggle/input/isic-2024/isic-2024/test-image.hdf5', mode='r')
test_dataset = TestDataset(test , hdf , meta_features)
fig, axes = plt.subplots(1, 3, figsize=(10, 7)) # 1 row, 3 columns
for i in range(3):
index = i
if index < len(train_dataset):
image, data = test_dataset[index]
axes[i].imshow(image)
axes[i].axis('off')
plt.tight_layout()
plt.show()
/kaggle/input/isic-2024/isic-2024/train-image/image/ISIC_0015657.jpg /kaggle/input/isic-2024/isic-2024/train-image/image/ISIC_0015729.jpg /kaggle/input/isic-2024/isic-2024/train-image/image/ISIC_0015740.jpg
import albumentations as A
from typing import Any, Callable, Literal, Tuple, cast
from PIL import Image, ImageDraw, ImageFilter
class DrawHair(A.ImageOnlyTransform) :
def __init__(self , count = 10 , size = 300 , p = 0.5) :
"""
Initializes the DrawHair class with the specified number of pre-generated
hair images, image size, and probability of applying the transformation.
Args:
count (int, optional): Number of pre-generated hair images. Defaults to 100.
size (int, optional): The size of the images. Defaults to 224.
p (float, optional): Probability of applying the transformation. Defaults to 0.5.
"""
super().__init__(always_apply=False, p=p) #to add the probabilistic feature
self.count = count
self.size = size
self.colors = [
(35, 18, 11),
(61, 35, 20),
(90, 56, 37),
(44, 22, 8),
(21, 15, 8),
]
self.premade_hairs = [self.generate_hairs() for _ in range(count)]
def generate_hairs(self):
"""
Generates a single hair pattern image. The number of hairs, their color,
and the direction of each hair are randomized to create a unique pattern.
Returns:
Image: An image object containing the generated hair pattern.
"""
num_hairs = random.choice([0, 0, 50, 100, 150, 200, 300, 400, 500, 600])
color = list(random.choice(self.colors))
width, height = (self.size + 200, self.size + 200)
hair_img = Image.new('RGBA', (width, height), tuple(color + [0]))
draw = ImageDraw.Draw(hair_img)
for _ in range(num_hairs):
direction = random.choice([[1, 1], [-1, 1], [1, -1]])
hair_color = tuple(color + [random.randint(70, 180)])
origin = random.randint(-width, width), random.randint(-height, height)
end0 = random.randint(100, int(width * 1.5)) * direction[0]
end1 = random.randint(100, int(height * 1.5)) * direction[1]
end = (origin[0] + end0, origin[1] + end1)
left, top = min(origin[0], end[0]), min(origin[1], end[1])
right, bottom = max(origin[0], end[0]), max(origin[1], end[1])
angles = (-random.randint(0, 70), random.randint(0, 70))
draw.arc([left, top, right, bottom], angles[0], angles[1], fill=hair_color, width=random.choice([1, 2, 3]))
hair_img = hair_img.filter(ImageFilter.GaussianBlur(radius=1))
return hair_img.resize((self.size, self.size), resample=Image.LANCZOS)
def apply(self, image: np.ndarray, **config: Any):
"""
Applies a random pre-generated hair pattern onto the given image if the
transformation is to be applied based on probability p.
Args:
image (np.ndarray): The input image onto which the hair pattern will be applied.
Returns:
np.ndarray: The input image with the overlaid hair pattern.
"""
if image.dtype != np.float32:
image = image.astype(np.float32)
img_pil = Image.fromarray(np.uint8(image)).convert('RGBA')
random_hairs = random.choice(self.premade_hairs).resize(img_pil.size, resample=Image.LANCZOS)
out = Image.alpha_composite(img_pil, random_hairs).convert('RGB')
return np.array(out)
class GridMask(A.ImageOnlyTransform):
"""
GridMask augmentation as an Albumentations transform.
Parameters:
- num_grid (int or tuple of int): Range of grid sizes (minimum, maximum) for the mask.
- fill_value (float): Pixel value for the masked areas (default is 0). means white / 255 : black
- rotate (int or tuple of int): Range of rotation angles in degrees for the mask.
- mode (int): Masking mode; 0 for regular, 1 for inverse, 2 for additional masking.
- always_apply (bool): If True, apply the transform to every image.
- p (float): Probability of applying the transform.
"""
def __init__(self, num_grid=3, fill_value=0, rotate=0, mode=0, always_apply=False, p=0.5):
super().__init__(always_apply = False, p = p)
if isinstance(num_grid, int):
num_grid = (num_grid, num_grid)
if isinstance(rotate, int):
rotate = (-rotate, rotate)
self.num_grid = num_grid
self.fill_value = fill_value
self.rotate = rotate
self.mode = mode
self.masks = None
self.rand_h_max = []
self.rand_w_max = []
def init_masks(self, height, width):
if self.masks is None:
self.masks = []
n_masks = self.num_grid[1] - self.num_grid[0] + 1
for n, n_g in enumerate(range(self.num_grid[0], self.num_grid[1] + 1, 1)):
grid_h = height / n_g #height of the square mask
grid_w = width / n_g # width of the square mask
this_mask = np.ones((int((n_g + 1) * grid_h), int((n_g + 1) * grid_w)), dtype='float32')
for i in range(n_g + 1):
for j in range(n_g + 1):
this_mask[
int(i * grid_h) : int(i * grid_h + grid_h / 2),
int(j * grid_w) : int(j * grid_w + grid_w / 2)
] = self.fill_value
if self.mode == 2:
this_mask[
int(i * grid_h + grid_h / 2) : int(i * grid_h + grid_h),
int(j * grid_w + grid_w / 2) : int(j * grid_w + grid_w)
] = self.fill_value
if self.mode == 1:
this_mask = 1 - this_mask
self.masks.append(this_mask)
self.rand_h_max.append(grid_h)
self.rand_w_max.append(grid_w)
def apply(self, img, **params):
dim = min(img.shape[:2])
self.init_masks(dim, dim)
mask = self.masks[0]
# Convert mask to torch tensor and repeat for 3 channels if needed
mask_tensor = torch.tensor(mask, dtype=torch.float32)
if img.shape[2] == 3:
mask_tensor = mask_tensor.unsqueeze(0).repeat(3, 1, 1)
# Randomly rotate the mask
angle = random.uniform(self.rotate[0], self.rotate[1])
mask_tensor = self.rotate_mask(mask_tensor, angle)
# Randomly crop the mask
crop_x = random.randint(0, mask_tensor.shape[1] - dim)
crop_y = random.randint(0, mask_tensor.shape[2] - dim)
cropped_mask = mask_tensor[:, crop_x:crop_x+dim, crop_y:crop_y+dim]
# Apply the mask to the image
img = torch.tensor(img, dtype=torch.float32).permute(2, 0, 1)
img = img * cropped_mask
return img.permute(1, 2, 0).numpy()
def rotate_mask(self, mask, angle):
mask = mask.numpy().transpose(1, 2, 0)
center = (mask.shape[1] / 2, mask.shape[0] / 2)
rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated_mask = cv2.warpAffine(mask, rotation_matrix, (mask.shape[1], mask.shape[0]))
return torch.tensor(rotated_mask.transpose(2, 0, 1))
from skimage.color import rgb2hsv, rgb2gray, rgb2yuv
from skimage import color, exposure, transform
from skimage.exposure import equalize_hist
from albumentations import RandomCrop
from scipy.fftpack import dct, idct
def isotropically_resize_image(img, size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC):
h, w = img.shape[:2]
if max(w, h) == size:
return img
if w > h:
scale = size / w
h = h * scale
w = size
else:
scale = size / h
w = w * scale
h = size
interpolation = interpolation_up if scale > 1 else interpolation_down
img = img.astype('uint8')
resized = cv2.resize(img, (int(w), int(h)), interpolation=interpolation)
return resized
class IsotropicResize(A.DualTransform):
def __init__(self, max_side, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC,
always_apply=False, p=1):
super(IsotropicResize, self).__init__(always_apply, p)
self.max_side = max_side
self.interpolation_down = interpolation_down
self.interpolation_up = interpolation_up
def apply(self, img, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC, **params):
return isotropically_resize_image(img, size=self.max_side, interpolation_down=interpolation_down,
interpolation_up=interpolation_up)
def apply_to_mask(self, img, **params):
return self.apply(img, interpolation_down=cv2.INTER_NEAREST, interpolation_up=cv2.INTER_NEAREST, **params)
def get_transform_init_args_names(self):
return ("max_side", "interpolation_down", "interpolation_up")
class Resize4xAndBack(A.ImageOnlyTransform):
def __init__(self, always_apply=False, p=0.5):
super(Resize4xAndBack, self).__init__(always_apply, p)
def apply(self, img, **params):
h, w = img.shape[:2]
scale = random.choice([2, 4])
img = cv2.resize(img, (w // scale, h // scale), interpolation=cv2.INTER_AREA)
img = cv2.resize(img, (w, h),
interpolation=random.choice([cv2.INTER_CUBIC, cv2.INTER_LINEAR, cv2.INTER_NEAREST]))
return img
def get_transforms(*, data):
image_size = CFG.size
if data == 'train':
return A.Compose([
# A.OneOf([
# IsotropicResize(max_side=image_size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC),
# IsotropicResize(max_side=image_size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_LINEAR),
# IsotropicResize(max_side=image_size, interpolation_down=cv2.INTER_LINEAR, interpolation_up=cv2.INTER_LINEAR),
# # CustomRandomCrop(size=image_size)
# ], p=1),
DrawHair(count=3),
A.Transpose(p=0.5),
A.VerticalFlip(p=0.5),
A.HorizontalFlip(p=0.5),
# contrast
A.OneOf([
A.RandomToneCurve(scale=0.3, p=0.5),
A.RandomBrightnessContrast(brightness_limit=(-0.1, 0.2), contrast_limit=(-0.4, 0.5), brightness_by_max=True, always_apply=False, p=0.5)
], p=0.5),
A.OneOf([
A.MotionBlur(blur_limit=5),
A.MedianBlur(blur_limit=5),
A.GaussianBlur(blur_limit=5),
A.GaussNoise(var_limit=(5.0, 30.0)),
], p=0.7),
A.OneOf([
A.OpticalDistortion(distort_limit=1.0),
A.GridDistortion(num_steps=5, distort_limit=1.0),
A.ElasticTransform(alpha=3),
], p=0.7),
A.CLAHE(clip_limit=4.0, p=0.7),
A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=20, val_shift_limit=10, p=0.5),
A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, border_mode=0, p=0.85),
A.Resize(image_size, image_size),
# A.Cutout(max_h_size=int(image_size * 0.375), max_w_size=int(image_size * 0.375), num_holes=1, p=0.7),
GridMask(num_grid=3, fill_value=0, rotate=0, mode=0 , p = 0.3),
Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
),
ToTensorV2(),
])
elif data == 'valid':
return A.Compose([
# IsotropicResize(max_side=image_size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC),
A.Resize(CFG.size, CFG.size),
# PadIfNeeded(min_height=size, min_width=size, border_mode=cv2.BORDER_CONSTANT, value=0),
Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
),
ToTensorV2(),
])
hdf_train = h5py.File(f'/kaggle/input/isic-2024/isic-2024/train-image.hdf5', mode='r')
train_dataset = TrainDataset(train , hdf5 = hdf_train , meta_features = meta_features , transform= get_transforms(data = "train") )
#image, label , file_path , data
fig, axes = plt.subplots(2, 4, figsize=(10, 7))
for i in range(2):
for j in range(4):
index = i * 3 + j
if index < len(train_dataset):
image, label , file_path , data = train_dataset[index]
axes[i, j].imshow(image.permute(1,2,0))
if label.numpy() == 1:
axes[i, j].set_title("Fake", color="r")
else:
axes[i, j].set_title("Real", color="g")
axes[i, j].axis('off')
plt.tight_layout()
plt.show()
folds = train.copy()
Fold = StratifiedGroupKFold(n_splits = CFG.n_fold , shuffle = True , random_state = CFG.seed , )
for n, (train_index, val_index) in enumerate(Fold.split(folds, folds[CFG.target_col] , groups=folds["patient_id"])):
folds.loc[val_index, 'fold'] = int(n)
folds['fold'] = folds['fold'].astype(int)
for fold in folds['fold'].unique() :
print(folds[folds['fold'] == fold]['target'].value_counts())
target 0 83272 1 89 Name: count, dtype: int64 target 0 71081 1 83 Name: count, dtype: int64 target 0 77585 1 60 Name: count, dtype: int64 target 0 81512 1 83 Name: count, dtype: int64 target 0 87216 1 78 Name: count, dtype: int64
class CustomResNext(nn.Module) :
def __init__(self , model = 'resnext50_32x4d', out_dim =2 , num_classes = 0 ,n_meta_features=0 , n_meta_dim=[512,256,128], pretrained = False ):
super().__init__() # num_classes=0 typically means you are not attaching a classification head to the model, which is often done when you want to use the model as a feature extractor.
self.extractor = timm.create_model(CFG.model_name ,
pretrained = pretrained ,
drop_rate = 0.1,
drop_path_rate = 0.2,
num_classes = num_classes ,
global_pool='avg'
)
self.dropouts = nn.ModuleList([
nn.Dropout(0.5) for _ in range(5)
])
in_ch = self.extractor.num_features
self.n_meta_features = n_meta_features
self.n_meta_dim = n_meta_dim
if n_meta_features > 0:
self.meta = nn.Sequential(
nn.Linear(n_meta_features, n_meta_dim[0]),
nn.BatchNorm1d(int(n_meta_dim[0])),
nn.SiLU(),
nn.Dropout(p=0.5),
nn.Linear(n_meta_dim[0], n_meta_dim[1]),
nn.BatchNorm1d(n_meta_dim[1]),
nn.SiLU(),
nn.Dropout(p=0.5),
nn.Linear(n_meta_dim[1],n_meta_dim[2]),
nn.BatchNorm1d(n_meta_dim[2]),
nn.SiLU(),
nn.Dropout(p=0.5)
)
in_ch += n_meta_dim[2]
self.out = nn.Linear(in_ch, out_dim)
# self.sigmoid = nn.Sigmoid()
def extract(self , x ) :
return self.extractor(x)
def forward(self , x , x_meta):
X = self.extract(x)
if self.n_meta_features > 0:
X_meta = self.meta(x_meta)
if X_meta is not None:
# X_meta = X_meta.unsqueeze(0)
# print(X.shape , X_meta.shape)
X = torch.cat((X, X_meta), dim=1)
for i, dropout in enumerate(self.dropouts):
if i == 0:
X = dropout(X)
else:
X += dropout(X)
X /= len(self.dropouts)
X_out = self.out(X)
# may add sigmoid if out = 1
return X_out
# # Calling the model now will cuz memory fragmentation when using accelerator
# model = CustomResNext(num_classes = 0 ,n_meta_features=len(meta_features), pretrained=False)
# model(train_dataset[0][0].unsqueeze(1).permute(1,0,2,3) , train_dataset[0][3][1])
torch.cuda.empty_cache()
print(torch.cuda.memory_summary())
|===========================================================================| | PyTorch CUDA memory summary, device ID 0 | |---------------------------------------------------------------------------| | CUDA OOMs: 0 | cudaMalloc retries: 0 | |===========================================================================| | Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed | |---------------------------------------------------------------------------| | Allocated memory | 0 B | 0 B | 0 B | 0 B | | from large pool | 0 B | 0 B | 0 B | 0 B | | from small pool | 0 B | 0 B | 0 B | 0 B | |---------------------------------------------------------------------------| | Active memory | 0 B | 0 B | 0 B | 0 B | | from large pool | 0 B | 0 B | 0 B | 0 B | | from small pool | 0 B | 0 B | 0 B | 0 B | |---------------------------------------------------------------------------| | Requested memory | 0 B | 0 B | 0 B | 0 B | | from large pool | 0 B | 0 B | 0 B | 0 B | | from small pool | 0 B | 0 B | 0 B | 0 B | |---------------------------------------------------------------------------| | GPU reserved memory | 0 B | 0 B | 0 B | 0 B | | from large pool | 0 B | 0 B | 0 B | 0 B | | from small pool | 0 B | 0 B | 0 B | 0 B | |---------------------------------------------------------------------------| | Non-releasable memory | 0 B | 0 B | 0 B | 0 B | | from large pool | 0 B | 0 B | 0 B | 0 B | | from small pool | 0 B | 0 B | 0 B | 0 B | |---------------------------------------------------------------------------| | Allocations | 0 | 0 | 0 | 0 | | from large pool | 0 | 0 | 0 | 0 | | from small pool | 0 | 0 | 0 | 0 | |---------------------------------------------------------------------------| | Active allocs | 0 | 0 | 0 | 0 | | from large pool | 0 | 0 | 0 | 0 | | from small pool | 0 | 0 | 0 | 0 | |---------------------------------------------------------------------------| | GPU reserved segments | 0 | 0 | 0 | 0 | | from large pool | 0 | 0 | 0 | 0 | | from small pool | 0 | 0 | 0 | 0 | |---------------------------------------------------------------------------| | Non-releasable allocs | 0 | 0 | 0 | 0 | | from large pool | 0 | 0 | 0 | 0 | | from small pool | 0 | 0 | 0 | 0 | |---------------------------------------------------------------------------| | Oversize allocations | 0 | 0 | 0 | 0 | |---------------------------------------------------------------------------| | Oversize GPU segments | 0 | 0 | 0 | 0 | |===========================================================================|
import wandb
try:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
api_key = user_secrets.get_secret("wandb_api")
wandb.login(key=api_key)
anonymous = None
except:
anonymous = "must"
print('To use your W&B account,\nGo to Add-ons -> Secrets and provide your W&B access token. Use the Label name as WANDB. \nGet your W&B access token from here: https://wandb.ai/authorize')
wandb: W&B API key is configured. Use `wandb login --relogin` to force relogin wandb: WARNING If you're specifying your api key in code, ensure this code is not shared publicly. wandb: WARNING Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line. wandb: Appending key for api.wandb.ai to your netrc file: /root/.netrc
CFG.WandB = True
if CFG.WandB :
run = wandb.init(entity = 'lassouedaymenla',
project = CFG.competition,
save_code = True,
name = f"{CFG.model_name}_Fd_{CFG.trn_fold[0]}_ek_{CFG.epochs}")
wandb: Currently logged in as: lassouedaymenla. Use `wandb login --relogin` to force relogin wandb: wandb version 0.17.7 is available! To upgrade, please run: wandb: $ pip install wandb --upgrade wandb: Tracking run with wandb version 0.17.4 wandb: Run data is saved locally in /kaggle/working/wandb/run-20240820_014056-f7534ypo wandb: Run `wandb offline` to turn off syncing. wandb: Syncing run tf_efficientnetv2_m.in21k_Fd_2_ek_10 wandb: ⭐️ View project at https://wandb.ai/lassouedaymenla/ISIC_2024 wandb: 🚀 View run at https://wandb.ai/lassouedaymenla/ISIC_2024/runs/f7534ypo
# torch.cuda.empty_cache()
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def asMinutes(s):
m = math.floor(s / 60)
s -= m * 60
return '%dm %ds' % (m, s)
def timeSince(since, percent):
now = time.time()
s = now - since
es = s / (percent)
rs = es - s
return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))
def train_fn(train_loader, model, criterion, optimizer, epoch, scheduler , accelerator):
batch_time = AverageMeter()
data_time = AverageMeter()
losses = AverageMeter()
scores = AverageMeter()
# switch to train mode
model.train()
start = end = time.time()
global_step = 0
for step, (images, labels , file_name , data) in enumerate(train_loader):
# measure data loading time
data_time.update(time.time() - end)
# Automated action while using HF accelerator
images = images.to(device)
labels = labels.to(device)
batch_size = labels.size(0)
meta = data[1].to(device)
# print(images.shape ,data[1].shape )
# print(images)
# print(data[1])
y_preds = model(images , meta)
# debug
# print(torch.nn.functional.softmax(y_preds, dim=1))
# print(labels)
loss = criterion(y_preds, labels)
# record loss
losses.update(loss.item(), batch_size)
if CFG.gradient_accumulation_steps > 1:
loss = loss / CFG.gradient_accumulation_steps
if CFG.apex:
with amp.scale_loss(loss, optimizer) as scaled_loss:
scaled_loss.backward()
else:
loss.backward()
grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
# optimizer step
optimizer.step()
optimizer.zero_grad()
if (step + 1) % CFG.gradient_accumulation_steps == 0:
optimizer.step()
optimizer.zero_grad()
global_step += 1
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
print('Epoch: [{0}][{1}/{2}] '
'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
'Elapsed {remain:s} '
'Loss: {loss.val:.4f}({loss.avg:.4f}) '
'Grad: {grad_norm:.4f} '
'LR: {lr:.6f} '
.format(
epoch+1, step, len(train_loader), batch_time=batch_time,
data_time=data_time, loss=losses,
remain=timeSince(start, float(step+1)/len(train_loader)),
grad_norm=grad_norm,
lr=scheduler.get_lr()[0],
))
wandb.log({
"Train Loss": losses.val,
"Step": step,
"Gradient Norm": grad_norm,
"Learning Rate": scheduler.get_lr()[0], # Add this line to log the learning rate
})
return losses.avg
def valid_fn(valid_loader, model, criterion , accelerator):
batch_time = AverageMeter()
data_time = AverageMeter()
losses = AverageMeter()
scores = AverageMeter()
model.eval()
preds = []
start = end = time.time()
for step, (images, labels , file_name , data) in enumerate(valid_loader):
data_time.update(time.time() - end)
images = images.to(device)
labels = labels.to(device)
batch_size = labels.size(0)
meta = data[1].to(device)
with torch.no_grad():
y_preds = model(images , meta)
labels = labels.cuda()
y_preds = y_preds.cuda()
loss = criterion(y_preds, labels)
losses.update(loss.item(), batch_size)
y_preds = torch.nn.functional.softmax(y_preds, dim=1)
y_preds = y_preds.to('cpu').numpy()
preds.append(y_preds)
if (step + 1) % CFG.gradient_accumulation_steps == 0:
pass
batch_time.update(time.time() - end)
end = time.time()
if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
print('EVAL: [{0}/{1}] '
'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
'Elapsed {remain:s} '
'Loss: {loss.val:.4f}({loss.avg:.4f}) '
.format(
step, len(valid_loader), batch_time=batch_time,
data_time=data_time, loss=losses,
remain=timeSince(start, float(step+1)/len(valid_loader)),
))
wandb.log({
"Val Loss ": losses.val,
"Val Step": step,
})
predictions = np.concatenate(preds)
return losses.avg, predictions
check_pAUC = False
if check_pAUC :
solution_data = {
'id': [1, 2, 3, 4, 5],
'target': [1, 0, 1, 0, 1]
}
submission_data = {
'id': [1, 2, 3, 4, 5],
'prediction': [0.9, 0.1, 0.8, 0.2, 0.7]
}
# Convert to DataFrames
solution_df = pd.DataFrame(solution_data)
submission_df = pd.DataFrame(submission_data)
# Set row_id_column_name
row_id_column_name = 'id'
# Call the score3 function
pauc_score = score(solution_df, submission_df, row_id_column_name)
print(f'pAUC Score: {pauc_score:.4f}')
import numpy as np
import pandas as pd
import pandas.api.types
from sklearn.metrics import roc_curve, auc, roc_auc_score
class ParticipantVisibleError(Exception):
pass
def score_pAUC(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str, min_tpr: float=0.80) -> float:
'''
2024 ISIC Challenge metric: pAUC
Given a solution file and submission file, this function returns the
the partial area under the receiver operating characteristic (pAUC)
above a given true positive rate (TPR) = 0.80.
https://en.wikipedia.org/wiki/Partial_Area_Under_the_ROC_Curve.
(c) 2024 Nicholas R Kurtansky, MSKCC
Args:
solution: ground truth pd.DataFrame of 1s and 0s
submission: solution dataframe of predictions of scores ranging [0, 1]
Returns:
Float value range [0, max_fpr]
'''
del solution[row_id_column_name]
del submission[row_id_column_name]
# check submission is numeric
if not pandas.api.types.is_numeric_dtype(submission.values):
raise ParticipantVisibleError('Submission target column must be numeric')
# rescale the target. set 0s to 1s and 1s to 0s (since sklearn only has max_fpr)
v_gt = abs(np.asarray(solution.values)-1)
# flip the submissions to their compliments
v_pred = -1.0*np.asarray(submission.values)
max_fpr = abs(1-min_tpr)
# using sklearn.metric functions: (1) roc_curve and (2) auc
fpr, tpr, _ = roc_curve(v_gt, v_pred, sample_weight=None)
if max_fpr is None or max_fpr == 1:
return auc(fpr, tpr)
if max_fpr <= 0 or max_fpr > 1:
raise ValueError("Expected min_tpr in range [0, 1), got: %r" % min_tpr)
# Add a single point at max_fpr by linear interpolation
stop = np.searchsorted(fpr, max_fpr, "right")
x_interp = [fpr[stop - 1], fpr[stop]]
y_interp = [tpr[stop - 1], tpr[stop]]
tpr = np.append(tpr[:stop], np.interp(max_fpr, x_interp, y_interp))
fpr = np.append(fpr[:stop], max_fpr)
partial_auc = auc(fpr, tpr)
# # Equivalent code that uses sklearn's roc_auc_score
# v_gt = abs(np.asarray(solution.values)-1)
# v_pred = np.array([1.0 - x for x in submission.values])
# max_fpr = abs(1-min_tpr)
# partial_auc_scaled = roc_auc_score(v_gt, v_pred, max_fpr=max_fpr)
# # change scale from [0.5, 1.0] to [0.5 * max_fpr**2, max_fpr]
# # https://math.stackexchange.com/questions/914823/shift-numbers-into-a-different-range
# partial_auc = 0.5 * max_fpr**2 + (max_fpr - 0.5 * max_fpr**2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)
return(partial_auc)
def train_loop(folds, fold):
LOGGER.info(f"========== fold: {fold} training ==========")
# ====================================================
# loader
# ====================================================
trn_idx = folds[folds['fold'] != fold].index
val_idx = folds[folds['fold'] == fold].index
train_folds = folds.loc[trn_idx].reset_index(drop=True)
valid_folds = folds.loc[val_idx].reset_index(drop=True)
# Over3000Down10000
##########################################################
df_minority = train_folds[train_folds['target'] == 1]
df_majority = train_folds[train_folds['target'] == 0]
df_minority_upsampled = resample(df_minority,
replace=True, # Sample with replacement
n_samples=3000, # Upsample to 3000
random_state=42) # For reproducibility
df_majority_downsampled = resample(df_majority,
replace=False, # Sample without replacement
n_samples=10000, # Downsample to 10000
random_state=42) # For reproducibility
train_resampled = pd.concat([df_minority_upsampled, df_majority_downsampled])
train_resampled = train_resampled.sample(frac=1, random_state=42).reset_index(drop=True)
print(train_resampled['target'].value_counts())
train_folds = train_resampled
###########################################################
hdf_train = h5py.File(f'/kaggle/input/isic-2024/isic-2024/train-image.hdf5', mode='r')
train_dataset = TrainDataset(train_folds , hdf5 = hdf_train , meta_features = meta_features , transform= get_transforms(data = "train") )
valid_dataset = TrainDataset(valid_folds, hdf5 = hdf_train , meta_features = meta_features ,
transform=get_transforms(data='valid'))
train_loader = DataLoader(train_dataset,
batch_size=CFG.batch_size,
shuffle=False,
num_workers=CFG.num_workers ,
pin_memory=True, drop_last=True)
valid_loader = DataLoader(valid_dataset,
batch_size=CFG.batch_size,
shuffle=False,
num_workers=CFG.num_workers, pin_memory=True, drop_last=False)
# ====================================================
# scheduler
# ====================================================
def get_scheduler(optimizer):
if CFG.scheduler=='ReduceLROnPlateau':
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
elif CFG.scheduler=='CosineAnnealingLR':
scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
elif CFG.scheduler=='CosineAnnealingWarmRestarts':
scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr, last_epoch=-1)
return scheduler
# ====================================================
# model & optimizer
# ====================================================
model = CustomResNext(CFG.model_name, num_classes = 0 ,n_meta_features=len(meta_features), pretrained=True)
model.to(device)
optimizer = Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay, amsgrad=False)
scheduler = get_scheduler(optimizer)
# ====================================================
# apex
# ====================================================
if CFG.apex:
model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
# ====================================================
# loop
# ====================================================
criterion = nn.CrossEntropyLoss(weight = torch.Tensor([0.3,0.7]).cuda())
best_score = 0
best_loss = np.inf
for epoch in range(CFG.epochs):
start_time = time.time()
# train
avg_loss = train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device)
# eval
avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)
valid_labels = valid_folds[CFG.target_col].values
if isinstance(scheduler, ReduceLROnPlateau):
scheduler.step(avg_val_loss)
elif isinstance(scheduler, CosineAnnealingLR):
scheduler.step()
elif isinstance(scheduler, CosineAnnealingWarmRestarts):
scheduler.step()
# scoring
# print(valid_labels.shape , preds.shape)
# score = get_score(valid_labels, preds)
preds= torch.nn.functional.softmax(torch.from_numpy(preds), dim=1).numpy()[:,1]
score2 = roc_auc_score(valid_labels, preds)
wandb.log({"pAUC" : score2})
comp_score = score_pAUC(pd.DataFrame({'id': valid_folds.index, 'target': valid_labels}),
pd.DataFrame({'id': valid_folds.index, 'prediction': preds}),
row_id_column_name='id')
elapsed = time.time() - start_time
LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f} avg_val_loss: {avg_val_loss:.4f} time: {elapsed:.0f}s') #.info makes the msg shows in red cadre
# LOGGER.info(f'Epoch {epoch+1} - LogLoss: {score} - AUC: {score2} - pAUC: {comp_score}')
LOGGER.info(f'Epoch {epoch+1} - AUC: {score2} - pAUC: {comp_score}')
if comp_score > best_score:
best_score = comp_score
LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
torch.save({'model': model.state_dict(),
'preds': preds},
OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth')
oof = pd.DataFrame({'filename' : valid_folds['isic_id'].values , 'prediction': preds , 'target': valid_labels })
oof.to_csv(OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.csv')
check_point = torch.load(OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth')
#valid_folds[[str(c) for c in range(5)]] = check_point['preds']
#valid_folds['preds'] = check_point['preds'].argmax(1)
return
train
| isic_id | target | patient_id | age_approx | sex | clin_size_long_diam_mm | image_type | tbp_tile_type | tbp_lv_A | tbp_lv_Aext | tbp_lv_B | tbp_lv_Bext | tbp_lv_C | tbp_lv_Cext | tbp_lv_H | tbp_lv_Hext | tbp_lv_L | tbp_lv_Lext | tbp_lv_areaMM2 | tbp_lv_area_perim_ratio | tbp_lv_color_std_mean | tbp_lv_deltaA | tbp_lv_deltaB | tbp_lv_deltaL | tbp_lv_deltaLB | tbp_lv_deltaLBnorm | tbp_lv_eccentricity | tbp_lv_minorAxisMM | tbp_lv_nevi_confidence | tbp_lv_norm_border | tbp_lv_norm_color | tbp_lv_perimeterMM | tbp_lv_radial_color_std_max | tbp_lv_stdL | tbp_lv_stdLExt | tbp_lv_symm_2axis | tbp_lv_symm_2axis_angle | tbp_lv_x | tbp_lv_y | tbp_lv_z | attribution | copyright_license | lesion_id | iddx_full | iddx_1 | iddx_2 | iddx_3 | iddx_4 | iddx_5 | mel_mitotic_index | mel_thick_mm | tbp_lv_dnn_lesion_confidence | Image_File | lesion_size_ratio | lesion_shape_index | hue_contrast | luminance_contrast | lesion_color_difference | border_complexity | color_uniformity | 3d_position_distance | perimeter_to_area_ratio | lesion_visibility_score | symmetry_border_consistency | color_consistency | size_age_interaction | hue_color_std_interaction | lesion_severity_index | shape_complexity_index | color_contrast_index | log_lesion_area | normalized_lesion_size | mean_hue_difference | std_dev_contrast | color_shape_composite_index | 3d_lesion_orientation | overall_color_difference | symmetry_perimeter_interaction | comprehensive_lesion_index | shape_complexity_ratio | color_variability | border_asymmetry | 3d_size_ratio | age_lesion_interaction | color_contrast_complexity | shape_color_composite | relative_lesion_size | border_color_interaction | 3d_radial_distance | 3d_polar_angle | 3d_azimuthal_angle | shape_size_ratio | color_border_complexity | visibility_size_interaction | age_adjusted_lesion_index | nonlinear_color_contrast | shape_location_index | border_complexity_asymmetry_ratio | color_variability_size_interaction | 3d_lesion_composite | nonlinear_shape_color_composite | n_images | anatom_site_general_anterior torso | anatom_site_general_head/neck | anatom_site_general_lower extremity | anatom_site_general_posterior torso | anatom_site_general_upper extremity | tbp_lv_location_simple_Head & Neck | tbp_lv_location_simple_Left Arm | tbp_lv_location_simple_Left Leg | tbp_lv_location_simple_Right Arm | tbp_lv_location_simple_Right Leg | tbp_lv_location_simple_Torso Back | tbp_lv_location_simple_Torso Front | tbp_lv_location_simple_Unknown | tbp_lv_location_Head & Neck | tbp_lv_location_Left Arm | tbp_lv_location_Left Arm - Lower | tbp_lv_location_Left Arm - Upper | tbp_lv_location_Left Leg | tbp_lv_location_Left Leg - Lower | tbp_lv_location_Left Leg - Upper | tbp_lv_location_Right Arm | tbp_lv_location_Right Arm - Lower | tbp_lv_location_Right Arm - Upper | tbp_lv_location_Right Leg | tbp_lv_location_Right Leg - Lower | tbp_lv_location_Right Leg - Upper | tbp_lv_location_Torso Back | tbp_lv_location_Torso Back Bottom Third | tbp_lv_location_Torso Back Middle Third | tbp_lv_location_Torso Back Top Third | tbp_lv_location_Torso Front | tbp_lv_location_Torso Front Bottom Half | tbp_lv_location_Torso Front Top Half | tbp_lv_location_Unknown | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ISIC_0015670 | 0 | IP_1235828 | 0.6875 | 0 | 0.074453 | TBP tile: close-up | 3D: white | 0.448560 | 0.549700 | 0.502444 | 0.376069 | 0.549810 | 0.402899 | 0.508448 | 0.267121 | 0.601787 | 0.547974 | 0.008153 | 0.218652 | 0.000000 | 0.387365 | 0.685141 | 0.836741 | 8.360566 | 0.101256 | 0.922243 | 0.070059 | 2.628592e-05 | 0.690918 | 0.000000 | 0.067335 | 0.000000 | 0.102225 | 0.080388 | 0.582086 | 0.485714 | 0.356776 | 0.566559 | 0.408087 | Memorial Sloan Kettering Cancer Center | CC-BY | NaN | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 97.517282 | /kaggle/input/isic-2024/isic-2024/train-image/... | 0.377312 | 0.306032 | 0.057723 | 7.658253 | 0.153458 | 7.681836 | 0.001929 | 641.525666 | 0.464388 | 0.074536 | 0.426332 | 0.093746 | 0.079887 | 0.000000 | 0.334336 | 7.718231 | 0.527696 | 0.195193 | 0.024389 | 0.404682 | 5.269909 | 9.355549 | 0.796815 | 0.723533 | 0.105880 | 0.215550 | 0.216541 | 0.000000 | 0.540615 | 0.124366 | 0.260433 | 0.499957 | 0.001206 | 0.245694 | 0.000000 | 641.525666 | 0.519977 | 1.860241 | 0.403224 | NaN | 0.062648 | 0.225713 | 0.999961 | 0.143238 | 0.164883 | 0.000000 | 0.114770 | 0.996913 | 0.657389 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 1 | ISIC_0015845 | 0 | IP_8170065 | 0.6875 | 0 | 0.003650 | TBP tile: close-up | 3D: white | 0.674860 | 0.747149 | 0.491697 | 0.391260 | 0.685050 | 0.552365 | 0.384152 | 0.152394 | 0.535710 | 0.465595 | 0.001462 | 0.019278 | 0.000000 | 0.438834 | 0.646551 | 0.867883 | 6.839008 | 0.072258 | 0.646281 | 0.030250 | 1.334303e-09 | 0.162262 | 0.000000 | 0.007756 | 0.000000 | 0.033828 | 0.131598 | 0.252621 | 0.314286 | 0.504132 | 0.893847 | 0.571023 | Memorial Sloan Kettering Cancer Center | CC-BY | IL_6727506 | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 3.141455 | /kaggle/input/isic-2024/isic-2024/train-image/... | 0.671535 | 0.862602 | 0.090871 | 6.500840 | 0.156607 | 2.402116 | 0.001929 | 1576.723962 | 0.585990 | 0.053190 | 0.058229 | 0.035666 | 0.019856 | 0.000000 | 0.061852 | 2.483847 | 0.555390 | 0.053754 | 0.003870 | 0.265244 | 5.345725 | 4.173668 | 0.750701 | 0.756171 | 0.009881 | 0.018873 | 0.021085 | 0.000000 | 0.259181 | 0.875064 | 0.072891 | 0.499957 | 0.001206 | 0.088039 | 0.000000 | 1576.723962 | 0.486381 | 1.570846 | 0.357300 | NaN | 0.004630 | 0.025995 | 0.999998 | 0.882084 | 0.004149 | 0.000000 | 0.129278 | 0.996913 | 0.479479 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2 | ISIC_0015864 | 0 | IP_6724798 | 0.6875 | 0 | 0.087591 | TBP tile: close-up | 3D: XP | 0.494565 | 0.568489 | 0.703178 | 0.619598 | 0.738108 | 0.606831 | 0.566217 | 0.336171 | 0.596911 | 0.543583 | 0.008491 | 0.175593 | 0.000000 | 0.419247 | 0.734515 | 0.835366 | 9.092376 | 0.119668 | 0.954805 | 0.050841 | 2.959177e-06 | 0.447253 | 0.000000 | 0.063125 | 0.000000 | 0.085311 | 0.052795 | 0.334987 | 0.600000 | 0.603966 | 0.858581 | 0.858501 | Memorial Sloan Kettering Cancer Center | CC-BY | NaN | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 99.804040 | /kaggle/input/isic-2024/isic-2024/train-image/... | 0.185612 | 0.366843 | 0.075925 | 7.709340 | 0.185190 | 5.160240 | 0.001929 | 1495.442825 | 0.424068 | 0.088089 | 0.174525 | 0.078557 | 0.091026 | 0.000000 | 0.216620 | 5.201588 | 0.589523 | 0.200096 | 0.028197 | 0.478441 | 6.033971 | 8.182175 | 0.737339 | 0.764476 | 0.057649 | 0.172135 | 0.125381 | 0.000000 | 0.572494 | 0.264886 | 0.179413 | 0.499957 | 0.001206 | 0.464085 | 0.000000 | 1495.442825 | 0.446995 | 1.486992 | 0.238232 | NaN | 0.074987 | 0.181627 | 1.000000 | 0.412719 | 0.208498 | 0.000000 | 0.245416 | 0.996913 | 0.810607 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 3 | ISIC_0015902 | 0 | IP_4111386 | 0.7500 | 0 | 0.081022 | TBP tile: close-up | 3D: XP | 0.330121 | 0.460827 | 0.402979 | 0.303669 | 0.407316 | 0.295051 | 0.539680 | 0.308385 | 0.173134 | 0.069389 | 0.016925 | 0.053995 | 0.051695 | 0.345916 | 0.599208 | 0.917270 | 4.783413 | 0.123664 | 0.661665 | 0.121860 | 2.198945e-01 | 0.147329 | 0.177170 | 0.069412 | 0.057844 | 0.057262 | 0.037652 | 0.170317 | 0.742857 | 0.390405 | 0.848437 | 0.572962 | ACEMID MIA | CC-0 | NaN | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 99.989998 | /kaggle/input/isic-2024/isic-2024/train-image/... | 0.700267 | 0.683752 | 0.075968 | 4.665323 | 0.057300 | 2.185455 | 0.001449 | 1450.239191 | 0.221867 | 0.138479 | 0.038648 | 0.164283 | 0.093760 | 29.026369 | 0.147492 | 2.252618 | 0.510845 | 0.292986 | 0.023673 | 0.446797 | 2.954563 | 5.204448 | 0.766225 | 0.701845 | 0.031795 | 0.073172 | 0.025163 | 0.148822 | 0.269298 | 0.271365 | 0.144972 | 0.510397 | 0.000654 | 0.077898 | 0.040456 | 1450.239191 | 0.485075 | 1.668272 | 0.102462 | 1.691708 | 0.093087 | 0.076888 | 0.999746 | 0.661763 | 0.074702 | 0.099840 | 0.160499 | 0.940654 | 0.664374 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
| 4 | ISIC_0024200 | 0 | IP_8313778 | 0.6250 | 0 | 0.063139 | TBP tile: close-up | 3D: white | 0.536985 | 0.632028 | 0.494130 | 0.420931 | 0.595283 | 0.488997 | 0.451561 | 0.230185 | 0.504685 | 0.459331 | 0.005005 | 0.119577 | 0.000000 | 0.402283 | 0.613124 | 0.811955 | 9.148495 | 0.128433 | 0.969901 | 0.036212 | 1.378832e-05 | 0.326168 | 0.000000 | 0.038916 | 0.000000 | 0.105071 | 0.074071 | 0.282586 | 0.114286 | 0.445845 | 0.864265 | 0.512548 | Memorial Sloan Kettering Cancer Center | CC-BY | NaN | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 70.442510 | /kaggle/input/isic-2024/isic-2024/train-image/... | 0.172334 | 0.476059 | 0.106167 | 8.579430 | 0.169487 | 3.972287 | 0.001929 | 1490.629394 | 0.486255 | 0.094542 | 0.113931 | 0.111047 | 0.063254 | 0.000000 | 0.158069 | 4.022532 | 0.497088 | 0.141714 | 0.023735 | 0.349728 | 5.655868 | 6.738664 | 0.758427 | 0.689332 | 0.032495 | 0.118431 | 0.075373 | 0.000000 | 0.461375 | 0.330097 | 0.125369 | 0.499957 | 0.001206 | 0.486625 | 0.000000 | 1490.629394 | 0.493560 | 1.619334 | 0.229006 | NaN | 0.063896 | 0.132898 | 0.998829 | 0.504179 | 0.153139 | 0.000000 | 0.201530 | 0.996913 | 0.639917 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 401054 | ISIC_9999937 | 0 | IP_1140263 | 0.8125 | 0 | 0.211679 | TBP tile: close-up | 3D: XP | 0.494536 | 0.521126 | 0.515904 | 0.447931 | 0.586077 | 0.443018 | 0.487281 | 0.315836 | 0.385361 | 0.440617 | 0.067308 | 0.092628 | 0.259206 | 0.466726 | 0.617734 | 0.585158 | 17.187750 | 0.381900 | 0.756712 | 0.254170 | 9.936233e-01 | 0.266975 | 0.734813 | 0.176467 | 0.202597 | 0.392395 | 0.061576 | 0.256087 | 0.571429 | 0.622958 | 0.760043 | 0.678136 | Department of Dermatology, Hospital Clínic de ... | CC-BY-NC | IL_9520694 | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 99.999988 | /kaggle/input/isic-2024/isic-2024/train-image/... | 0.634890 | 0.547280 | 0.209217 | 17.008450 | 0.382087 | 3.390731 | 0.002075 | 1197.740551 | 0.102628 | 0.477913 | 0.088179 | 0.425315 | 0.231305 | 131.016077 | 0.500945 | 3.446777 | 0.526605 | 0.515932 | 0.053883 | 0.419786 | 10.775006 | 6.903743 | 0.730957 | 0.614949 | 0.113153 | 0.181408 | 0.055155 | 0.133119 | 0.399441 | 0.102939 | 0.441927 | 0.544264 | 0.001432 | 0.101932 | 0.056144 | 1197.740551 | 0.464636 | 1.446943 | 0.070953 | 3.757460 | 0.389263 | 0.179022 | 0.999956 | 0.449790 | 0.120393 | 0.154905 | 0.200620 | 0.999038 | 0.605171 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
| 401055 | ISIC_9999951 | 0 | IP_5678181 | 0.6875 | 0 | 0.077007 | TBP tile: close-up | 3D: white | 0.443295 | 0.544600 | 0.633924 | 0.556393 | 0.655484 | 0.543159 | 0.570062 | 0.333835 | 0.658611 | 0.610268 | 0.013045 | 0.097717 | 0.081617 | 0.386676 | 0.690975 | 0.828566 | 8.671249 | 0.097520 | 0.730502 | 0.104002 | 2.311562e-03 | 0.409172 | 0.279525 | 0.067668 | 0.091213 | 0.093165 | 0.091352 | 0.441034 | 0.142857 | 0.546435 | 0.831770 | 0.685676 | Memorial Sloan Kettering Cancer Center | CC-BY | NaN | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 99.999820 | /kaggle/input/isic-2024/isic-2024/train-image/... | 0.606029 | 0.532598 | 0.062324 | 7.962060 | 0.160682 | 4.899972 | 0.001451 | 1399.966447 | 0.289525 | 0.146645 | 0.205950 | 0.077729 | 0.082053 | 48.479191 | 0.332370 | 4.954822 | 0.523040 | 0.255966 | 0.025129 | 0.479404 | 5.443848 | 6.501268 | 0.744708 | 0.721352 | 0.080510 | 0.131420 | 0.084775 | 0.157334 | 0.438239 | 0.271220 | 0.259080 | 0.519117 | 0.001440 | 0.113716 | 0.095893 | 1399.966447 | 0.468425 | 1.533238 | 0.248585 | 3.797567 | 0.093690 | 0.140282 | 0.999935 | 0.517427 | 0.082548 | 0.091546 | 0.198470 | 0.999077 | 0.626251 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 401056 | ISIC_9999960 | 0 | IP_0076153 | 0.7500 | 1 | 0.038321 | TBP tile: close-up | 3D: XP | 0.391100 | 0.465157 | 0.555550 | 0.441107 | 0.564682 | 0.409718 | 0.571694 | 0.356429 | 0.422326 | 0.372602 | 0.006973 | 0.029935 | 0.050055 | 0.408813 | 0.697427 | 0.816060 | 9.267366 | 0.176141 | 0.656763 | 0.072968 | 5.994798e-01 | 0.090863 | 0.166041 | 0.034235 | 0.052872 | 0.082950 | 0.063017 | 0.141688 | 0.228571 | 0.548269 | 0.720228 | 0.302728 | Frazer Institute, The University of Queensland... | CC-BY | IL_9852274 | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 99.999416 | /kaggle/input/isic-2024/isic-2024/train-image/... | 0.709842 | 0.799960 | 0.106954 | 8.426838 | 0.182489 | 1.627598 | 0.001535 | 1071.999873 | 0.328530 | 0.174127 | 0.023274 | 0.102968 | 0.054538 | 29.819207 | 0.113808 | 1.704226 | 0.575412 | 0.176917 | 0.012250 | 0.493238 | 5.968948 | 4.577102 | 0.742529 | 0.731840 | 0.012848 | 0.048516 | 0.012936 | 0.106503 | 0.192135 | 0.315915 | 0.119857 | 0.517776 | 0.000524 | 0.074654 | 0.021562 | 1071.999873 | 0.531008 | 1.519565 | 0.124007 | 1.334636 | 0.066451 | 0.052331 | 1.000000 | 0.554938 | 0.020968 | 0.040166 | 0.101910 | 0.884804 | 0.480515 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
| 401057 | ISIC_9999964 | 0 | IP_5231513 | 0.3125 | 1 | 0.065693 | TBP tile: close-up | 3D: XP | 0.488897 | 0.404430 | 0.529857 | 0.454194 | 0.593638 | 0.395893 | 0.497716 | 0.410203 | 0.541314 | 0.533738 | 0.016757 | 0.039585 | 0.132411 | 0.577571 | 0.634747 | 0.732143 | 11.751160 | 0.200620 | 0.491148 | 0.125537 | 9.931933e-01 | 0.088423 | 0.358397 | 0.065396 | 0.093829 | 0.197278 | 0.053052 | 0.118717 | 0.800000 | 0.496238 | 0.656369 | 0.351561 | University Hospital of Basel | CC-BY-NC | NaN | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 100.000000 | /kaggle/input/isic-2024/isic-2024/train-image/... | 0.871405 | 0.749631 | 0.387508 | 11.545670 | 0.347967 | 1.583386 | 0.002288 | 880.952401 | 0.212779 | 0.243662 | 0.019739 | 0.185876 | 0.029139 | 68.404855 | 0.204544 | 1.655914 | 0.636265 | 0.291523 | 0.051466 | 0.479725 | 9.953426 | 5.089118 | 0.752484 | 0.769697 | 0.020812 | 0.079411 | 0.013803 | 0.130358 | 0.194449 | 0.187990 | 0.072638 | 0.545422 | 0.000759 | 0.028302 | 0.025674 | 880.952401 | 0.526966 | 1.582034 | 0.084774 | 1.935352 | 0.121353 | 0.133885 | 1.000000 | 0.427143 | 0.052183 | 0.087002 | 0.096675 | 0.965685 | 0.662388 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 |
| 401058 | ISIC_9999967 | 0 | IP_6426047 | 0.5625 | 0 | 0.083942 | TBP tile: close-up | 3D: XP | 0.380451 | 0.444992 | 0.403144 | 0.306060 | 0.434260 | 0.288605 | 0.498154 | 0.324136 | 0.265458 | 0.255607 | 0.019399 | 0.053571 | 0.098350 | 0.417300 | 0.596460 | 0.718983 | 12.157000 | 0.328629 | 0.675802 | 0.127479 | 9.365141e-01 | 0.137519 | 0.310094 | 0.075561 | 0.094688 | 0.161926 | 0.102932 | 0.157707 | 0.257143 | 0.295317 | 0.408429 | 0.653693 | Department of Dermatology, University of Athen... | CC-BY | NaN | Benign | Benign | NaN | NaN | NaN | NaN | NaN | NaN | 99.999960 | /kaggle/input/isic-2024/isic-2024/train-image/... | 0.715202 | 0.685555 | 0.202607 | 12.034760 | 0.250523 | 2.081471 | 0.001684 | 317.333017 | 0.204711 | 0.324955 | 0.034402 | 0.250929 | 0.070913 | 50.854538 | 0.212554 | 2.148780 | 0.537111 | 0.313205 | 0.034120 | 0.430995 | 7.607111 | 5.344518 | 0.917956 | 0.643558 | 0.032020 | 0.089329 | 0.023502 | 0.116116 | 0.258989 | 0.053822 | 0.142874 | 0.523077 | 0.000725 | 0.072866 | 0.030063 | 317.333017 | 0.384716 | 2.620471 | 0.094543 | 1.872560 | 0.176021 | 0.108624 | 0.999986 | 0.129311 | 0.077815 | 0.083394 | 0.030362 | 0.958984 | 0.659003 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
401059 rows × 136 columns
def main():
"""
Prepare: 1.train 2.test 3.submission 4.folds
"""
def get_result(result_df):
preds = result_df['preds'].values
labels = result_df[CFG.target_col].values
score = get_score(labels, preds)
LOGGER.info(f'Score: {score:<.5f}')
if CFG.train:
# train
oof_df = pd.DataFrame()
for fold in range(CFG.n_fold):
if fold in CFG.trn_fold:
train_loop(folds, fold)
#oof_df = pd.concat([oof_df, _oof_df])
#LOGGER.info(f"========== fold: {fold} result ==========")
#get_result(_oof_df)
# CV result
LOGGER.info(f"========== CV ==========")
#get_result(oof_df)
# save result
#oof_df.to_csv(OUTPUT_DIR+'oof_df.csv', index=False)
if CFG.inference:
# inference
model = CustomResNext(CFG.model_name, pretrained=False)
states = [torch.load(OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth') for fold in CFG.trn_fold]
test_dataset = TestDataset(test, transform=get_transforms(data='valid'))
test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False,
num_workers=CFG.num_workers, pin_memory=True)
predictions = inference(model, states, test_loader, device)
# submission
print(predictions)
test['label'] = torch.nn.functional.softmax(torch.from_numpy(predictions), dim=1).numpy()[:,1]
print(test['label'])
test[['img_name', 'label']].to_csv(OUTPUT_DIR+'submission.csv', index=False)
from sklearn.utils import resample
if __name__ == '__main__':
main()
========== fold: 2 training ==========
target 0 10000 1 3000 Name: count, dtype: int64
Epoch: [1][0/650] Data 1.094 (1.094) Elapsed 0m 3s (remain 39m 52s) Loss: 3.5243(3.5243) Grad: 88.2561 LR: 0.000100 Epoch: [1][18/650] Data 0.270 (0.298) Elapsed 0m 15s (remain 8m 41s) Loss: 0.7758(1.8412) Grad: 68.7465 LR: 0.000100 Epoch: [1][36/650] Data 0.248 (0.281) Elapsed 0m 28s (remain 7m 44s) Loss: 1.6655(1.8824) Grad: 53.8497 LR: 0.000100 Epoch: [1][54/650] Data 0.271 (0.276) Elapsed 0m 40s (remain 7m 16s) Loss: 2.0998(1.7617) Grad: 70.7080 LR: 0.000100 Epoch: [1][72/650] Data 0.272 (0.274) Elapsed 0m 52s (remain 6m 55s) Loss: 1.7836(1.7374) Grad: 56.8125 LR: 0.000100 Epoch: [1][90/650] Data 0.272 (0.273) Elapsed 1m 4s (remain 6m 38s) Loss: 3.0666(1.6971) Grad: 69.2511 LR: 0.000100 Epoch: [1][108/650] Data 0.269 (0.272) Elapsed 1m 17s (remain 6m 23s) Loss: 1.5897(1.6144) Grad: 52.9566 LR: 0.000100 Epoch: [1][126/650] Data 0.268 (0.271) Elapsed 1m 29s (remain 6m 8s) Loss: 0.8502(1.5542) Grad: 28.3672 LR: 0.000100 Epoch: [1][144/650] Data 0.259 (0.271) Elapsed 1m 41s (remain 5m 54s) Loss: 1.2228(1.5023) Grad: 41.7399 LR: 0.000100 Epoch: [1][162/650] Data 0.272 (0.271) Elapsed 1m 54s (remain 5m 40s) Loss: 0.9951(1.4584) Grad: 32.2839 LR: 0.000100 Epoch: [1][180/650] Data 0.272 (0.270) Elapsed 2m 6s (remain 5m 27s) Loss: 1.0090(1.4137) Grad: 35.8288 LR: 0.000100 Epoch: [1][198/650] Data 0.271 (0.270) Elapsed 2m 18s (remain 5m 14s) Loss: 0.5395(1.3594) Grad: 29.4353 LR: 0.000100 Epoch: [1][216/650] Data 0.271 (0.269) Elapsed 2m 31s (remain 5m 1s) Loss: 0.8337(1.3185) Grad: 22.6757 LR: 0.000100 Epoch: [1][234/650] Data 0.271 (0.269) Elapsed 2m 43s (remain 4m 48s) Loss: 0.9160(1.2864) Grad: 29.3670 LR: 0.000100 Epoch: [1][252/650] Data 0.264 (0.268) Elapsed 2m 55s (remain 4m 35s) Loss: 1.0782(1.2481) Grad: 32.6038 LR: 0.000100 Epoch: [1][270/650] Data 0.260 (0.268) Elapsed 3m 8s (remain 4m 22s) Loss: 0.2894(1.2132) Grad: 9.0277 LR: 0.000100 Epoch: [1][288/650] Data 0.272 (0.268) Elapsed 3m 20s (remain 4m 10s) Loss: 0.3973(1.1846) Grad: 15.0366 LR: 0.000100 Epoch: [1][306/650] Data 0.270 (0.268) Elapsed 3m 32s (remain 3m 57s) Loss: 1.2028(1.1562) Grad: 28.0102 LR: 0.000100 Epoch: [1][324/650] Data 0.271 (0.268) Elapsed 3m 44s (remain 3m 44s) Loss: 0.2811(1.1305) Grad: 10.3663 LR: 0.000100 Epoch: [1][342/650] Data 0.271 (0.268) Elapsed 3m 57s (remain 3m 32s) Loss: 1.0315(1.1047) Grad: 28.1144 LR: 0.000100 Epoch: [1][360/650] Data 0.267 (0.267) Elapsed 4m 9s (remain 3m 19s) Loss: 0.2589(1.0819) Grad: 10.1003 LR: 0.000100 Epoch: [1][378/650] Data 0.263 (0.267) Elapsed 4m 21s (remain 3m 7s) Loss: 0.9474(1.0648) Grad: 27.8960 LR: 0.000100 Epoch: [1][396/650] Data 0.253 (0.267) Elapsed 4m 34s (remain 2m 54s) Loss: 0.7659(1.0464) Grad: 25.9070 LR: 0.000100 Epoch: [1][414/650] Data 0.271 (0.267) Elapsed 4m 46s (remain 2m 42s) Loss: 0.9235(1.0263) Grad: 23.6163 LR: 0.000100 Epoch: [1][432/650] Data 0.259 (0.267) Elapsed 4m 58s (remain 2m 29s) Loss: 0.8599(1.0089) Grad: 21.5964 LR: 0.000100 Epoch: [1][450/650] Data 0.249 (0.267) Elapsed 5m 11s (remain 2m 17s) Loss: 0.6775(0.9899) Grad: 25.0101 LR: 0.000100 Epoch: [1][468/650] Data 0.260 (0.267) Elapsed 5m 23s (remain 2m 4s) Loss: 0.5397(0.9707) Grad: 20.5652 LR: 0.000100 Epoch: [1][486/650] Data 0.266 (0.267) Elapsed 5m 35s (remain 1m 52s) Loss: 0.7107(0.9590) Grad: 22.9121 LR: 0.000100 Epoch: [1][504/650] Data 0.271 (0.267) Elapsed 5m 47s (remain 1m 39s) Loss: 0.5597(0.9454) Grad: 15.1320 LR: 0.000100 Epoch: [1][522/650] Data 0.263 (0.267) Elapsed 6m 0s (remain 1m 27s) Loss: 0.5222(0.9337) Grad: 20.3184 LR: 0.000100 Epoch: [1][540/650] Data 0.272 (0.267) Elapsed 6m 12s (remain 1m 15s) Loss: 0.3124(0.9211) Grad: 12.5482 LR: 0.000100 Epoch: [1][558/650] Data 0.255 (0.267) Elapsed 6m 24s (remain 1m 2s) Loss: 0.3745(0.9116) Grad: 12.6663 LR: 0.000100 Epoch: [1][576/650] Data 0.267 (0.267) Elapsed 6m 37s (remain 0m 50s) Loss: 0.9547(0.9022) Grad: 22.3890 LR: 0.000100 Epoch: [1][594/650] Data 0.273 (0.267) Elapsed 6m 49s (remain 0m 37s) Loss: 0.6682(0.8915) Grad: 20.6627 LR: 0.000100 Epoch: [1][612/650] Data 0.271 (0.267) Elapsed 7m 1s (remain 0m 25s) Loss: 0.5520(0.8817) Grad: 13.6343 LR: 0.000100 Epoch: [1][630/650] Data 0.272 (0.267) Elapsed 7m 14s (remain 0m 13s) Loss: 1.0143(0.8717) Grad: 21.2005 LR: 0.000100 Epoch: [1][648/650] Data 0.273 (0.267) Elapsed 7m 26s (remain 0m 0s) Loss: 0.7269(0.8607) Grad: 17.1867 LR: 0.000100 Epoch: [1][649/650] Data 0.273 (0.267) Elapsed 7m 27s (remain 0m 0s) Loss: 0.5090(0.8602) Grad: 16.8420 LR: 0.000100 EVAL: [0/3883] Data 0.499 (0.499) Elapsed 0m 0s (remain 44m 33s) Loss: 0.4381(0.4381) EVAL: [18/3883] Data 0.001 (0.028) Elapsed 0m 3s (remain 13m 2s) Loss: 0.5002(0.3744) EVAL: [36/3883] Data 0.001 (0.015) Elapsed 0m 6s (remain 12m 7s) Loss: 0.3218(0.3593) EVAL: [54/3883] Data 0.001 (0.010) Elapsed 0m 10s (remain 11m 46s) Loss: 0.1657(0.3550) EVAL: [72/3883] Data 0.001 (0.008) Elapsed 0m 13s (remain 11m 33s) Loss: 0.3832(0.3693) EVAL: [90/3883] Data 0.001 (0.006) Elapsed 0m 16s (remain 11m 25s) Loss: 0.4993(0.3707) EVAL: [108/3883] Data 0.001 (0.006) Elapsed 0m 19s (remain 11m 18s) Loss: 0.8354(0.3842) EVAL: [126/3883] Data 0.001 (0.005) Elapsed 0m 22s (remain 11m 12s) Loss: 0.5466(0.3942) EVAL: [144/3883] Data 0.001 (0.004) Elapsed 0m 25s (remain 11m 7s) Loss: 0.5858(0.3973) EVAL: [162/3883] Data 0.001 (0.004) Elapsed 0m 29s (remain 11m 2s) Loss: 0.2338(0.4001) EVAL: [180/3883] Data 0.001 (0.004) Elapsed 0m 32s (remain 10m 58s) Loss: 0.1705(0.3901) EVAL: [198/3883] Data 0.001 (0.003) Elapsed 0m 35s (remain 10m 54s) Loss: 0.3506(0.3907) EVAL: [216/3883] Data 0.001 (0.003) Elapsed 0m 38s (remain 10m 50s) Loss: 0.3607(0.3904) EVAL: [234/3883] Data 0.001 (0.003) Elapsed 0m 41s (remain 10m 46s) Loss: 0.3918(0.3891) EVAL: [252/3883] Data 0.001 (0.003) Elapsed 0m 44s (remain 10m 42s) Loss: 0.4226(0.3846) EVAL: [270/3883] Data 0.001 (0.003) Elapsed 0m 47s (remain 10m 38s) Loss: 0.3249(0.3875) EVAL: [288/3883] Data 0.001 (0.003) Elapsed 0m 51s (remain 10m 35s) Loss: 0.5338(0.3909) EVAL: [306/3883] Data 0.001 (0.003) Elapsed 0m 54s (remain 10m 31s) Loss: 0.2750(0.3903) EVAL: [324/3883] Data 0.001 (0.002) Elapsed 0m 57s (remain 10m 28s) Loss: 0.2308(0.3896) EVAL: [342/3883] Data 0.001 (0.002) Elapsed 1m 0s (remain 10m 24s) Loss: 0.1096(0.3913) EVAL: [360/3883] Data 0.001 (0.002) Elapsed 1m 3s (remain 10m 21s) Loss: 0.3877(0.3913) EVAL: [378/3883] Data 0.001 (0.002) Elapsed 1m 6s (remain 10m 17s) Loss: 0.4232(0.3931) EVAL: [396/3883] Data 0.001 (0.002) Elapsed 1m 9s (remain 10m 14s) Loss: 0.5665(0.3918) EVAL: [414/3883] Data 0.001 (0.002) Elapsed 1m 13s (remain 10m 11s) Loss: 0.4080(0.3907) EVAL: [432/3883] Data 0.001 (0.002) Elapsed 1m 16s (remain 10m 7s) Loss: 0.3875(0.3906) EVAL: [450/3883] Data 0.001 (0.002) Elapsed 1m 19s (remain 10m 4s) Loss: 0.3887(0.3906) EVAL: [468/3883] Data 0.001 (0.002) Elapsed 1m 22s (remain 10m 1s) Loss: 0.2919(0.3930) EVAL: [486/3883] Data 0.001 (0.002) Elapsed 1m 25s (remain 9m 57s) Loss: 0.4803(0.3924) EVAL: [504/3883] Data 0.001 (0.002) Elapsed 1m 28s (remain 9m 54s) Loss: 0.2495(0.3917) EVAL: [522/3883] Data 0.001 (0.002) Elapsed 1m 32s (remain 9m 51s) Loss: 0.6714(0.3903) EVAL: [540/3883] Data 0.001 (0.002) Elapsed 1m 35s (remain 9m 47s) Loss: 0.2734(0.3896) EVAL: [558/3883] Data 0.001 (0.002) Elapsed 1m 38s (remain 9m 44s) Loss: 0.5121(0.3911) EVAL: [576/3883] Data 0.001 (0.002) Elapsed 1m 41s (remain 9m 41s) Loss: 0.2138(0.3891) EVAL: [594/3883] Data 0.001 (0.002) Elapsed 1m 44s (remain 9m 38s) Loss: 0.3425(0.3872) EVAL: [612/3883] Data 0.001 (0.002) Elapsed 1m 47s (remain 9m 34s) Loss: 0.3249(0.3870) EVAL: [630/3883] Data 0.001 (0.002) Elapsed 1m 50s (remain 9m 31s) Loss: 0.3349(0.3875) EVAL: [648/3883] Data 0.001 (0.002) Elapsed 1m 54s (remain 9m 28s) Loss: 0.1616(0.3871) EVAL: [666/3883] Data 0.001 (0.002) Elapsed 1m 57s (remain 9m 25s) Loss: 0.2064(0.3864) EVAL: [684/3883] Data 0.001 (0.002) Elapsed 2m 0s (remain 9m 21s) Loss: 0.4660(0.3874) EVAL: [702/3883] Data 0.001 (0.002) Elapsed 2m 3s (remain 9m 18s) Loss: 0.4269(0.3852) EVAL: [720/3883] Data 0.001 (0.002) Elapsed 2m 6s (remain 9m 15s) Loss: 0.4283(0.3847) EVAL: [738/3883] Data 0.001 (0.002) Elapsed 2m 9s (remain 9m 12s) Loss: 0.2061(0.3851) EVAL: [756/3883] Data 0.001 (0.002) Elapsed 2m 12s (remain 9m 9s) Loss: 0.3393(0.3851) EVAL: [774/3883] Data 0.001 (0.002) Elapsed 2m 16s (remain 9m 5s) Loss: 0.3480(0.3857) EVAL: [792/3883] Data 0.001 (0.002) Elapsed 2m 19s (remain 9m 2s) Loss: 0.3959(0.3871) EVAL: [810/3883] Data 0.001 (0.002) Elapsed 2m 22s (remain 8m 59s) Loss: 0.2176(0.3871) EVAL: [828/3883] Data 0.001 (0.002) Elapsed 2m 25s (remain 8m 56s) Loss: 0.2506(0.3876) EVAL: [846/3883] Data 0.001 (0.001) Elapsed 2m 28s (remain 8m 52s) Loss: 0.2162(0.3885) EVAL: [864/3883] Data 0.001 (0.001) Elapsed 2m 31s (remain 8m 49s) Loss: 0.5012(0.3899) EVAL: [882/3883] Data 0.001 (0.001) Elapsed 2m 34s (remain 8m 46s) Loss: 0.0853(0.3893) EVAL: [900/3883] Data 0.001 (0.001) Elapsed 2m 38s (remain 8m 43s) Loss: 0.5533(0.3895) EVAL: [918/3883] Data 0.001 (0.001) Elapsed 2m 41s (remain 8m 40s) Loss: 0.5037(0.3900) EVAL: [936/3883] Data 0.001 (0.001) Elapsed 2m 44s (remain 8m 36s) Loss: 0.5038(0.3896) EVAL: [954/3883] Data 0.001 (0.001) Elapsed 2m 47s (remain 8m 33s) Loss: 0.3163(0.3883) EVAL: [972/3883] Data 0.001 (0.001) Elapsed 2m 50s (remain 8m 30s) Loss: 0.1257(0.3885) EVAL: [990/3883] Data 0.001 (0.001) Elapsed 2m 53s (remain 8m 27s) Loss: 0.4717(0.3883) EVAL: [1008/3883] Data 0.001 (0.001) Elapsed 2m 57s (remain 8m 24s) Loss: 0.4044(0.3884) EVAL: [1026/3883] Data 0.001 (0.001) Elapsed 3m 0s (remain 8m 21s) Loss: 0.1554(0.3873) EVAL: [1044/3883] Data 0.001 (0.001) Elapsed 3m 3s (remain 8m 17s) Loss: 0.3022(0.3873) EVAL: [1062/3883] Data 0.001 (0.001) Elapsed 3m 6s (remain 8m 14s) Loss: 0.3534(0.3874) EVAL: [1080/3883] Data 0.001 (0.001) Elapsed 3m 9s (remain 8m 11s) Loss: 0.3599(0.3872) EVAL: [1098/3883] Data 0.001 (0.001) Elapsed 3m 12s (remain 8m 8s) Loss: 0.2153(0.3865) EVAL: [1116/3883] Data 0.001 (0.001) Elapsed 3m 15s (remain 8m 5s) Loss: 0.5516(0.3869) EVAL: [1134/3883] Data 0.001 (0.001) Elapsed 3m 19s (remain 8m 1s) Loss: 0.1750(0.3862) EVAL: [1152/3883] Data 0.001 (0.001) Elapsed 3m 22s (remain 7m 58s) Loss: 0.4854(0.3869) EVAL: [1170/3883] Data 0.001 (0.001) Elapsed 3m 25s (remain 7m 55s) Loss: 0.4427(0.3860) EVAL: [1188/3883] Data 0.001 (0.001) Elapsed 3m 28s (remain 7m 52s) Loss: 0.3641(0.3866) EVAL: [1206/3883] Data 0.001 (0.001) Elapsed 3m 31s (remain 7m 49s) Loss: 0.2484(0.3866) EVAL: [1224/3883] Data 0.001 (0.001) Elapsed 3m 34s (remain 7m 46s) Loss: 0.3988(0.3864) EVAL: [1242/3883] Data 0.001 (0.001) Elapsed 3m 37s (remain 7m 42s) Loss: 0.5588(0.3867) EVAL: [1260/3883] Data 0.001 (0.001) Elapsed 3m 41s (remain 7m 39s) Loss: 0.6628(0.3872) EVAL: [1278/3883] Data 0.001 (0.001) Elapsed 3m 44s (remain 7m 36s) Loss: 0.5253(0.3873) EVAL: [1296/3883] Data 0.001 (0.001) Elapsed 3m 47s (remain 7m 33s) Loss: 0.2245(0.3872) EVAL: [1314/3883] Data 0.001 (0.001) Elapsed 3m 50s (remain 7m 30s) Loss: 0.4149(0.3879) EVAL: [1332/3883] Data 0.001 (0.001) Elapsed 3m 53s (remain 7m 27s) Loss: 0.2844(0.3872) EVAL: [1350/3883] Data 0.001 (0.001) Elapsed 3m 56s (remain 7m 23s) Loss: 0.2141(0.3870) EVAL: [1368/3883] Data 0.001 (0.001) Elapsed 3m 59s (remain 7m 20s) Loss: 0.8111(0.3883) EVAL: [1386/3883] Data 0.001 (0.001) Elapsed 4m 3s (remain 7m 17s) Loss: 0.2058(0.3880) EVAL: [1404/3883] Data 0.001 (0.001) Elapsed 4m 6s (remain 7m 14s) Loss: 0.4379(0.3881) EVAL: [1422/3883] Data 0.001 (0.001) Elapsed 4m 9s (remain 7m 11s) Loss: 0.4210(0.3881) EVAL: [1440/3883] Data 0.001 (0.001) Elapsed 4m 12s (remain 7m 8s) Loss: 0.3823(0.3874) EVAL: [1458/3883] Data 0.001 (0.001) Elapsed 4m 15s (remain 7m 4s) Loss: 0.2784(0.3876) EVAL: [1476/3883] Data 0.001 (0.001) Elapsed 4m 18s (remain 7m 1s) Loss: 0.3118(0.3871) EVAL: [1494/3883] Data 0.001 (0.001) Elapsed 4m 22s (remain 6m 58s) Loss: 0.6044(0.3865) EVAL: [1512/3883] Data 0.001 (0.001) Elapsed 4m 25s (remain 6m 55s) Loss: 0.6427(0.3862) EVAL: [1530/3883] Data 0.003 (0.001) Elapsed 4m 28s (remain 6m 52s) Loss: 0.4457(0.3859) EVAL: [1548/3883] Data 0.001 (0.001) Elapsed 4m 31s (remain 6m 49s) Loss: 0.4420(0.3854) EVAL: [1566/3883] Data 0.001 (0.001) Elapsed 4m 34s (remain 6m 45s) Loss: 0.3235(0.3847) EVAL: [1584/3883] Data 0.001 (0.001) Elapsed 4m 37s (remain 6m 42s) Loss: 0.2181(0.3847) EVAL: [1602/3883] Data 0.001 (0.001) Elapsed 4m 40s (remain 6m 39s) Loss: 0.6755(0.3852) EVAL: [1620/3883] Data 0.001 (0.001) Elapsed 4m 44s (remain 6m 36s) Loss: 0.8215(0.3863) EVAL: [1638/3883] Data 0.001 (0.001) Elapsed 4m 47s (remain 6m 33s) Loss: 0.2758(0.3866) EVAL: [1656/3883] Data 0.001 (0.001) Elapsed 4m 50s (remain 6m 30s) Loss: 0.2801(0.3872) EVAL: [1674/3883] Data 0.001 (0.001) Elapsed 4m 53s (remain 6m 26s) Loss: 0.3649(0.3876) EVAL: [1692/3883] Data 0.001 (0.001) Elapsed 4m 56s (remain 6m 23s) Loss: 0.2677(0.3877) EVAL: [1710/3883] Data 0.001 (0.001) Elapsed 4m 59s (remain 6m 20s) Loss: 0.5662(0.3884) EVAL: [1728/3883] Data 0.001 (0.001) Elapsed 5m 2s (remain 6m 17s) Loss: 0.3305(0.3888) EVAL: [1746/3883] Data 0.001 (0.001) Elapsed 5m 6s (remain 6m 14s) Loss: 0.4049(0.3894) EVAL: [1764/3883] Data 0.001 (0.001) Elapsed 5m 9s (remain 6m 11s) Loss: 0.2036(0.3895) EVAL: [1782/3883] Data 0.001 (0.001) Elapsed 5m 12s (remain 6m 7s) Loss: 0.3680(0.3895) EVAL: [1800/3883] Data 0.001 (0.001) Elapsed 5m 15s (remain 6m 4s) Loss: 0.5654(0.3891) EVAL: [1818/3883] Data 0.001 (0.001) Elapsed 5m 18s (remain 6m 1s) Loss: 0.2664(0.3889) EVAL: [1836/3883] Data 0.001 (0.001) Elapsed 5m 21s (remain 5m 58s) Loss: 0.3189(0.3892) EVAL: [1854/3883] Data 0.001 (0.001) Elapsed 5m 24s (remain 5m 55s) Loss: 0.1155(0.3888) EVAL: [1872/3883] Data 0.001 (0.001) Elapsed 5m 28s (remain 5m 52s) Loss: 0.6842(0.3889) EVAL: [1890/3883] Data 0.001 (0.001) Elapsed 5m 31s (remain 5m 48s) Loss: 0.2787(0.3890) EVAL: [1908/3883] Data 0.001 (0.001) Elapsed 5m 34s (remain 5m 45s) Loss: 0.4071(0.3888) EVAL: [1926/3883] Data 0.001 (0.001) Elapsed 5m 37s (remain 5m 42s) Loss: 0.1842(0.3884) EVAL: [1944/3883] Data 0.001 (0.001) Elapsed 5m 40s (remain 5m 39s) Loss: 0.3079(0.3887) EVAL: [1962/3883] Data 0.001 (0.001) Elapsed 5m 43s (remain 5m 36s) Loss: 0.2134(0.3894) EVAL: [1980/3883] Data 0.001 (0.001) Elapsed 5m 47s (remain 5m 33s) Loss: 0.4866(0.3896) EVAL: [1998/3883] Data 0.001 (0.001) Elapsed 5m 50s (remain 5m 30s) Loss: 0.2357(0.3897) EVAL: [2016/3883] Data 0.001 (0.001) Elapsed 5m 53s (remain 5m 26s) Loss: 0.3194(0.3898) EVAL: [2034/3883] Data 0.001 (0.001) Elapsed 5m 56s (remain 5m 23s) Loss: 0.2397(0.3896) EVAL: [2052/3883] Data 0.001 (0.001) Elapsed 5m 59s (remain 5m 20s) Loss: 0.6325(0.3897) EVAL: [2070/3883] Data 0.001 (0.001) Elapsed 6m 2s (remain 5m 17s) Loss: 0.4882(0.3903) EVAL: [2088/3883] Data 0.001 (0.001) Elapsed 6m 5s (remain 5m 14s) Loss: 0.1250(0.3901) EVAL: [2106/3883] Data 0.001 (0.001) Elapsed 6m 9s (remain 5m 11s) Loss: 0.5751(0.3899) EVAL: [2124/3883] Data 0.001 (0.001) Elapsed 6m 12s (remain 5m 7s) Loss: 0.2849(0.3893) EVAL: [2142/3883] Data 0.001 (0.001) Elapsed 6m 15s (remain 5m 4s) Loss: 0.3084(0.3892) EVAL: [2160/3883] Data 0.001 (0.001) Elapsed 6m 18s (remain 5m 1s) Loss: 0.5098(0.3887) EVAL: [2178/3883] Data 0.001 (0.001) Elapsed 6m 21s (remain 4m 58s) Loss: 0.1769(0.3879) EVAL: [2196/3883] Data 0.001 (0.001) Elapsed 6m 24s (remain 4m 55s) Loss: 0.3323(0.3880) EVAL: [2214/3883] Data 0.001 (0.001) Elapsed 6m 27s (remain 4m 52s) Loss: 0.5231(0.3881) EVAL: [2232/3883] Data 0.001 (0.001) Elapsed 6m 31s (remain 4m 48s) Loss: 0.5544(0.3880) EVAL: [2250/3883] Data 0.001 (0.001) Elapsed 6m 34s (remain 4m 45s) Loss: 0.7136(0.3882) EVAL: [2268/3883] Data 0.001 (0.001) Elapsed 6m 37s (remain 4m 42s) Loss: 0.2869(0.3881) EVAL: [2286/3883] Data 0.001 (0.001) Elapsed 6m 40s (remain 4m 39s) Loss: 0.7316(0.3878) EVAL: [2304/3883] Data 0.001 (0.001) Elapsed 6m 43s (remain 4m 36s) Loss: 0.4014(0.3878) EVAL: [2322/3883] Data 0.001 (0.001) Elapsed 6m 46s (remain 4m 33s) Loss: 0.2719(0.3874) EVAL: [2340/3883] Data 0.001 (0.001) Elapsed 6m 50s (remain 4m 30s) Loss: 0.4887(0.3874) EVAL: [2358/3883] Data 0.001 (0.001) Elapsed 6m 53s (remain 4m 26s) Loss: 0.5208(0.3872) EVAL: [2376/3883] Data 0.001 (0.001) Elapsed 6m 56s (remain 4m 23s) Loss: 0.5303(0.3870) EVAL: [2394/3883] Data 0.001 (0.001) Elapsed 6m 59s (remain 4m 20s) Loss: 0.6512(0.3869) EVAL: [2412/3883] Data 0.001 (0.001) Elapsed 7m 2s (remain 4m 17s) Loss: 0.4824(0.3869) EVAL: [2430/3883] Data 0.001 (0.001) Elapsed 7m 5s (remain 4m 14s) Loss: 0.2176(0.3864) EVAL: [2448/3883] Data 0.002 (0.001) Elapsed 7m 8s (remain 4m 11s) Loss: 0.3585(0.3860) EVAL: [2466/3883] Data 0.001 (0.001) Elapsed 7m 12s (remain 4m 7s) Loss: 0.2684(0.3861) EVAL: [2484/3883] Data 0.001 (0.001) Elapsed 7m 15s (remain 4m 4s) Loss: 0.5313(0.3866) EVAL: [2502/3883] Data 0.001 (0.001) Elapsed 7m 18s (remain 4m 1s) Loss: 0.2752(0.3871) EVAL: [2520/3883] Data 0.001 (0.001) Elapsed 7m 21s (remain 3m 58s) Loss: 0.3823(0.3871) EVAL: [2538/3883] Data 0.001 (0.001) Elapsed 7m 24s (remain 3m 55s) Loss: 0.4412(0.3875) EVAL: [2556/3883] Data 0.001 (0.001) Elapsed 7m 27s (remain 3m 52s) Loss: 0.7861(0.3874) EVAL: [2574/3883] Data 0.001 (0.001) Elapsed 7m 30s (remain 3m 49s) Loss: 0.6420(0.3874) EVAL: [2592/3883] Data 0.001 (0.001) Elapsed 7m 34s (remain 3m 45s) Loss: 0.2034(0.3877) EVAL: [2610/3883] Data 0.001 (0.001) Elapsed 7m 37s (remain 3m 42s) Loss: 0.3482(0.3877) EVAL: [2628/3883] Data 0.001 (0.001) Elapsed 7m 40s (remain 3m 39s) Loss: 0.1686(0.3876) EVAL: [2646/3883] Data 0.001 (0.001) Elapsed 7m 43s (remain 3m 36s) Loss: 0.4539(0.3876) EVAL: [2664/3883] Data 0.001 (0.001) Elapsed 7m 46s (remain 3m 33s) Loss: 0.3845(0.3875) EVAL: [2682/3883] Data 0.001 (0.001) Elapsed 7m 49s (remain 3m 30s) Loss: 0.1901(0.3870) EVAL: [2700/3883] Data 0.001 (0.001) Elapsed 7m 52s (remain 3m 26s) Loss: 0.4236(0.3868) EVAL: [2718/3883] Data 0.001 (0.001) Elapsed 7m 56s (remain 3m 23s) Loss: 0.4896(0.3867) EVAL: [2736/3883] Data 0.001 (0.001) Elapsed 7m 59s (remain 3m 20s) Loss: 0.1220(0.3867) EVAL: [2754/3883] Data 0.001 (0.001) Elapsed 8m 2s (remain 3m 17s) Loss: 0.3902(0.3874) EVAL: [2772/3883] Data 0.001 (0.001) Elapsed 8m 5s (remain 3m 14s) Loss: 0.5802(0.3876) EVAL: [2790/3883] Data 0.001 (0.001) Elapsed 8m 8s (remain 3m 11s) Loss: 0.2578(0.3873) EVAL: [2808/3883] Data 0.001 (0.001) Elapsed 8m 11s (remain 3m 8s) Loss: 0.5002(0.3873) EVAL: [2826/3883] Data 0.001 (0.001) Elapsed 8m 15s (remain 3m 4s) Loss: 0.4060(0.3871) EVAL: [2844/3883] Data 0.001 (0.001) Elapsed 8m 18s (remain 3m 1s) Loss: 0.6644(0.3872) EVAL: [2862/3883] Data 0.001 (0.001) Elapsed 8m 21s (remain 2m 58s) Loss: 0.6403(0.3871) EVAL: [2880/3883] Data 0.001 (0.001) Elapsed 8m 24s (remain 2m 55s) Loss: 0.2028(0.3867) EVAL: [2898/3883] Data 0.001 (0.001) Elapsed 8m 27s (remain 2m 52s) Loss: 0.3228(0.3867) EVAL: [2916/3883] Data 0.001 (0.001) Elapsed 8m 30s (remain 2m 49s) Loss: 0.4995(0.3866) EVAL: [2934/3883] Data 0.001 (0.001) Elapsed 8m 33s (remain 2m 45s) Loss: 0.6318(0.3866) EVAL: [2952/3883] Data 0.001 (0.001) Elapsed 8m 37s (remain 2m 42s) Loss: 0.1441(0.3867) EVAL: [2970/3883] Data 0.001 (0.001) Elapsed 8m 40s (remain 2m 39s) Loss: 0.3390(0.3868) EVAL: [2988/3883] Data 0.001 (0.001) Elapsed 8m 43s (remain 2m 36s) Loss: 0.3221(0.3869) EVAL: [3006/3883] Data 0.001 (0.001) Elapsed 8m 46s (remain 2m 33s) Loss: 0.5247(0.3867) EVAL: [3024/3883] Data 0.001 (0.001) Elapsed 8m 49s (remain 2m 30s) Loss: 0.4454(0.3867) EVAL: [3042/3883] Data 0.001 (0.001) Elapsed 8m 52s (remain 2m 27s) Loss: 0.3169(0.3865) EVAL: [3060/3883] Data 0.001 (0.001) Elapsed 8m 55s (remain 2m 23s) Loss: 0.3938(0.3861) EVAL: [3078/3883] Data 0.001 (0.001) Elapsed 8m 59s (remain 2m 20s) Loss: 0.4561(0.3861) EVAL: [3096/3883] Data 0.001 (0.001) Elapsed 9m 2s (remain 2m 17s) Loss: 0.8708(0.3863) EVAL: [3114/3883] Data 0.001 (0.001) Elapsed 9m 5s (remain 2m 14s) Loss: 0.6743(0.3865) EVAL: [3132/3883] Data 0.001 (0.001) Elapsed 9m 8s (remain 2m 11s) Loss: 0.2955(0.3864) EVAL: [3150/3883] Data 0.001 (0.001) Elapsed 9m 11s (remain 2m 8s) Loss: 0.4218(0.3863) EVAL: [3168/3883] Data 0.001 (0.001) Elapsed 9m 14s (remain 2m 5s) Loss: 0.6962(0.3863) EVAL: [3186/3883] Data 0.001 (0.001) Elapsed 9m 17s (remain 2m 1s) Loss: 0.1438(0.3860) EVAL: [3204/3883] Data 0.001 (0.001) Elapsed 9m 21s (remain 1m 58s) Loss: 0.5117(0.3861) EVAL: [3222/3883] Data 0.001 (0.001) Elapsed 9m 24s (remain 1m 55s) Loss: 0.2541(0.3860) EVAL: [3240/3883] Data 0.001 (0.001) Elapsed 9m 27s (remain 1m 52s) Loss: 0.8336(0.3860) EVAL: [3258/3883] Data 0.001 (0.001) Elapsed 9m 30s (remain 1m 49s) Loss: 0.2186(0.3860) EVAL: [3276/3883] Data 0.001 (0.001) Elapsed 9m 33s (remain 1m 46s) Loss: 0.5246(0.3859) EVAL: [3294/3883] Data 0.001 (0.001) Elapsed 9m 36s (remain 1m 42s) Loss: 0.3215(0.3861) EVAL: [3312/3883] Data 0.001 (0.001) Elapsed 9m 40s (remain 1m 39s) Loss: 0.0979(0.3861) EVAL: [3330/3883] Data 0.001 (0.001) Elapsed 9m 43s (remain 1m 36s) Loss: 0.3324(0.3859) EVAL: [3348/3883] Data 0.001 (0.001) Elapsed 9m 46s (remain 1m 33s) Loss: 0.2753(0.3860) EVAL: [3366/3883] Data 0.001 (0.001) Elapsed 9m 49s (remain 1m 30s) Loss: 0.4307(0.3858) EVAL: [3384/3883] Data 0.001 (0.001) Elapsed 9m 52s (remain 1m 27s) Loss: 0.4982(0.3860) EVAL: [3402/3883] Data 0.001 (0.001) Elapsed 9m 55s (remain 1m 24s) Loss: 0.3686(0.3862) EVAL: [3420/3883] Data 0.001 (0.001) Elapsed 9m 58s (remain 1m 20s) Loss: 0.6403(0.3859) EVAL: [3438/3883] Data 0.001 (0.001) Elapsed 10m 2s (remain 1m 17s) Loss: 0.2733(0.3861) EVAL: [3456/3883] Data 0.001 (0.001) Elapsed 10m 5s (remain 1m 14s) Loss: 0.5780(0.3862) EVAL: [3474/3883] Data 0.001 (0.001) Elapsed 10m 8s (remain 1m 11s) Loss: 0.1601(0.3863) EVAL: [3492/3883] Data 0.001 (0.001) Elapsed 10m 11s (remain 1m 8s) Loss: 0.4136(0.3866) EVAL: [3510/3883] Data 0.001 (0.001) Elapsed 10m 14s (remain 1m 5s) Loss: 0.2691(0.3864) EVAL: [3528/3883] Data 0.001 (0.001) Elapsed 10m 17s (remain 1m 1s) Loss: 0.8080(0.3867) EVAL: [3546/3883] Data 0.001 (0.001) Elapsed 10m 20s (remain 0m 58s) Loss: 0.6231(0.3868) EVAL: [3564/3883] Data 0.001 (0.001) Elapsed 10m 24s (remain 0m 55s) Loss: 0.6234(0.3867) EVAL: [3582/3883] Data 0.001 (0.001) Elapsed 10m 27s (remain 0m 52s) Loss: 0.2234(0.3865) EVAL: [3600/3883] Data 0.001 (0.001) Elapsed 10m 30s (remain 0m 49s) Loss: 0.2450(0.3866) EVAL: [3618/3883] Data 0.001 (0.001) Elapsed 10m 33s (remain 0m 46s) Loss: 0.3118(0.3865) EVAL: [3636/3883] Data 0.001 (0.001) Elapsed 10m 36s (remain 0m 43s) Loss: 0.1431(0.3866) EVAL: [3654/3883] Data 0.001 (0.001) Elapsed 10m 39s (remain 0m 39s) Loss: 0.4818(0.3867) EVAL: [3672/3883] Data 0.001 (0.001) Elapsed 10m 42s (remain 0m 36s) Loss: 0.8194(0.3869) EVAL: [3690/3883] Data 0.001 (0.001) Elapsed 10m 46s (remain 0m 33s) Loss: 0.2142(0.3869) EVAL: [3708/3883] Data 0.001 (0.001) Elapsed 10m 49s (remain 0m 30s) Loss: 0.4991(0.3869) EVAL: [3726/3883] Data 0.001 (0.001) Elapsed 10m 52s (remain 0m 27s) Loss: 0.5069(0.3869) EVAL: [3744/3883] Data 0.001 (0.001) Elapsed 10m 55s (remain 0m 24s) Loss: 0.3938(0.3868) EVAL: [3762/3883] Data 0.001 (0.001) Elapsed 10m 58s (remain 0m 21s) Loss: 0.2337(0.3868) EVAL: [3780/3883] Data 0.001 (0.001) Elapsed 11m 1s (remain 0m 17s) Loss: 0.1664(0.3867) EVAL: [3798/3883] Data 0.001 (0.001) Elapsed 11m 5s (remain 0m 14s) Loss: 0.1616(0.3868) EVAL: [3816/3883] Data 0.001 (0.001) Elapsed 11m 8s (remain 0m 11s) Loss: 0.2270(0.3867) EVAL: [3834/3883] Data 0.001 (0.001) Elapsed 11m 11s (remain 0m 8s) Loss: 0.2859(0.3870) EVAL: [3852/3883] Data 0.001 (0.001) Elapsed 11m 14s (remain 0m 5s) Loss: 0.3702(0.3871) EVAL: [3870/3883] Data 0.001 (0.001) Elapsed 11m 17s (remain 0m 2s) Loss: 0.2314(0.3867)
Epoch 1 - avg_train_loss: 0.8602 avg_val_loss: 0.3868 time: 1127s Epoch 1 - AUC: 0.9477652467186527 - pAUC: 0.16142628514962082 Epoch 1 - Save Best Score: 0.1614 Model
EVAL: [3882/3883] Data 0.001 (0.001) Elapsed 11m 19s (remain 0m 0s) Loss: 0.7261(0.3868) Epoch: [2][0/650] Data 0.879 (0.879) Elapsed 0m 1s (remain 14m 29s) Loss: 0.6439(0.6439) Grad: 19.0668 LR: 0.000098 Epoch: [2][18/650] Data 0.269 (0.296) Elapsed 0m 13s (remain 7m 31s) Loss: 0.3772(0.5004) Grad: 11.6672 LR: 0.000098 Epoch: [2][36/650] Data 0.272 (0.282) Elapsed 0m 25s (remain 7m 9s) Loss: 1.1432(0.5320) Grad: 31.3411 LR: 0.000098 Epoch: [2][54/650] Data 0.269 (0.277) Elapsed 0m 38s (remain 6m 53s) Loss: 0.3563(0.5282) Grad: 11.9038 LR: 0.000098 Epoch: [2][72/650] Data 0.266 (0.275) Elapsed 0m 50s (remain 6m 39s) Loss: 0.6747(0.5323) Grad: 20.4311 LR: 0.000098 Epoch: [2][90/650] Data 0.252 (0.273) Elapsed 1m 2s (remain 6m 26s) Loss: 0.9387(0.5260) Grad: 25.1824 LR: 0.000098 Epoch: [2][108/650] Data 0.273 (0.270) Elapsed 1m 15s (remain 6m 13s) Loss: 0.6322(0.5173) Grad: 17.2351 LR: 0.000098 Epoch: [2][126/650] Data 0.273 (0.269) Elapsed 1m 27s (remain 6m 0s) Loss: 0.6374(0.5100) Grad: 17.7875 LR: 0.000098 Epoch: [2][144/650] Data 0.263 (0.268) Elapsed 1m 39s (remain 5m 47s) Loss: 0.4050(0.5000) Grad: 13.3298 LR: 0.000098 Epoch: [2][162/650] Data 0.271 (0.269) Elapsed 1m 52s (remain 5m 34s) Loss: 0.7207(0.5047) Grad: 17.4840 LR: 0.000098 Epoch: [2][180/650] Data 0.272 (0.268) Elapsed 2m 4s (remain 5m 22s) Loss: 0.4872(0.5117) Grad: 12.6906 LR: 0.000098 Epoch: [2][198/650] Data 0.261 (0.268) Elapsed 2m 16s (remain 5m 9s) Loss: 0.4582(0.5071) Grad: 17.1446 LR: 0.000098 Epoch: [2][216/650] Data 0.271 (0.268) Elapsed 2m 29s (remain 4m 57s) Loss: 0.9624(0.5033) Grad: 21.6777 LR: 0.000098 Epoch: [2][234/650] Data 0.259 (0.268) Elapsed 2m 41s (remain 4m 44s) Loss: 0.5756(0.5049) Grad: 14.4410 LR: 0.000098 Epoch: [2][252/650] Data 0.266 (0.268) Elapsed 2m 53s (remain 4m 32s) Loss: 0.3603(0.4992) Grad: 10.9208 LR: 0.000098 Epoch: [2][270/650] Data 0.264 (0.268) Elapsed 3m 5s (remain 4m 20s) Loss: 0.2299(0.4938) Grad: 7.9438 LR: 0.000098 Epoch: [2][288/650] Data 0.264 (0.268) Elapsed 3m 18s (remain 4m 7s) Loss: 0.4541(0.4956) Grad: 12.0555 LR: 0.000098 Epoch: [2][306/650] Data 0.272 (0.268) Elapsed 3m 30s (remain 3m 55s) Loss: 0.1797(0.4931) Grad: 8.0973 LR: 0.000098 Epoch: [2][324/650] Data 0.265 (0.268) Elapsed 3m 42s (remain 3m 42s) Loss: 0.4167(0.4920) Grad: 12.4313 LR: 0.000098 Epoch: [2][342/650] Data 0.272 (0.268) Elapsed 3m 55s (remain 3m 30s) Loss: 0.1934(0.4859) Grad: 8.9752 LR: 0.000098 Epoch: [2][360/650] Data 0.272 (0.268) Elapsed 4m 7s (remain 3m 18s) Loss: 0.4769(0.4875) Grad: 14.0812 LR: 0.000098 Epoch: [2][378/650] Data 0.271 (0.268) Elapsed 4m 19s (remain 3m 5s) Loss: 0.4079(0.4861) Grad: 11.6847 LR: 0.000098 Epoch: [2][396/650] Data 0.271 (0.268) Elapsed 4m 32s (remain 2m 53s) Loss: 0.2117(0.4866) Grad: 8.8013 LR: 0.000098 Epoch: [2][414/650] Data 0.259 (0.268) Elapsed 4m 44s (remain 2m 41s) Loss: 1.0991(0.4878) Grad: 22.1012 LR: 0.000098 Epoch: [2][432/650] Data 0.264 (0.268) Elapsed 4m 56s (remain 2m 28s) Loss: 0.4128(0.4853) Grad: 11.9749 LR: 0.000098 Epoch: [2][450/650] Data 0.261 (0.268) Elapsed 5m 8s (remain 2m 16s) Loss: 0.4381(0.4861) Grad: 14.3544 LR: 0.000098 Epoch: [2][468/650] Data 0.268 (0.268) Elapsed 5m 21s (remain 2m 3s) Loss: 0.4091(0.4825) Grad: 13.4175 LR: 0.000098 Epoch: [2][486/650] Data 0.266 (0.268) Elapsed 5m 33s (remain 1m 51s) Loss: 0.7158(0.4842) Grad: 21.4496 LR: 0.000098 Epoch: [2][504/650] Data 0.266 (0.268) Elapsed 5m 45s (remain 1m 39s) Loss: 0.3441(0.4838) Grad: 9.2027 LR: 0.000098 Epoch: [2][522/650] Data 0.273 (0.268) Elapsed 5m 58s (remain 1m 26s) Loss: 0.2895(0.4828) Grad: 13.0865 LR: 0.000098 Epoch: [2][540/650] Data 0.273 (0.268) Elapsed 6m 10s (remain 1m 14s) Loss: 0.4308(0.4805) Grad: 16.5972 LR: 0.000098 Epoch: [2][558/650] Data 0.270 (0.268) Elapsed 6m 22s (remain 1m 2s) Loss: 0.3672(0.4803) Grad: 9.5362 LR: 0.000098 Epoch: [2][576/650] Data 0.270 (0.268) Elapsed 6m 35s (remain 0m 49s) Loss: 0.4240(0.4783) Grad: 12.9091 LR: 0.000098 Epoch: [2][594/650] Data 0.271 (0.268) Elapsed 6m 47s (remain 0m 37s) Loss: 0.5612(0.4772) Grad: 13.3439 LR: 0.000098 Epoch: [2][612/650] Data 0.269 (0.268) Elapsed 6m 59s (remain 0m 25s) Loss: 0.3119(0.4742) Grad: 8.1953 LR: 0.000098 Epoch: [2][630/650] Data 0.263 (0.268) Elapsed 7m 12s (remain 0m 13s) Loss: 0.4496(0.4717) Grad: 13.7777 LR: 0.000098 Epoch: [2][648/650] Data 0.259 (0.268) Elapsed 7m 24s (remain 0m 0s) Loss: 0.3236(0.4706) Grad: 12.8262 LR: 0.000098 Epoch: [2][649/650] Data 0.273 (0.268) Elapsed 7m 25s (remain 0m 0s) Loss: 0.3428(0.4704) Grad: 16.1197 LR: 0.000098 EVAL: [0/3883] Data 0.588 (0.588) Elapsed 0m 0s (remain 49m 25s) Loss: 0.1770(0.1770) EVAL: [18/3883] Data 0.001 (0.032) Elapsed 0m 3s (remain 13m 17s) Loss: 0.1737(0.1417) EVAL: [36/3883] Data 0.001 (0.017) Elapsed 0m 7s (remain 12m 14s) Loss: 0.1269(0.1333) EVAL: [54/3883] Data 0.001 (0.012) Elapsed 0m 10s (remain 11m 51s) Loss: 0.0539(0.1329) EVAL: [72/3883] Data 0.002 (0.009) Elapsed 0m 13s (remain 11m 37s) Loss: 0.2033(0.1438) EVAL: [90/3883] Data 0.002 (0.007) Elapsed 0m 16s (remain 11m 28s) Loss: 0.2019(0.1487) EVAL: [108/3883] Data 0.001 (0.006) Elapsed 0m 19s (remain 11m 21s) Loss: 0.3960(0.1579) EVAL: [126/3883] Data 0.002 (0.006) Elapsed 0m 22s (remain 11m 15s) Loss: 0.2764(0.1630) EVAL: [144/3883] Data 0.001 (0.005) Elapsed 0m 25s (remain 11m 10s) Loss: 0.2941(0.1641) EVAL: [162/3883] Data 0.001 (0.005) Elapsed 0m 29s (remain 11m 5s) Loss: 0.1169(0.1649) EVAL: [180/3883] Data 0.001 (0.004) Elapsed 0m 32s (remain 11m 0s) Loss: 0.0275(0.1607) EVAL: [198/3883] Data 0.001 (0.004) Elapsed 0m 35s (remain 10m 56s) Loss: 0.1268(0.1629) EVAL: [216/3883] Data 0.001 (0.004) Elapsed 0m 38s (remain 10m 52s) Loss: 0.0800(0.1617) EVAL: [234/3883] Data 0.001 (0.003) Elapsed 0m 41s (remain 10m 48s) Loss: 0.1952(0.1608) EVAL: [252/3883] Data 0.001 (0.003) Elapsed 0m 44s (remain 10m 44s) Loss: 0.1427(0.1578) EVAL: [270/3883] Data 0.001 (0.003) Elapsed 0m 48s (remain 10m 40s) Loss: 0.1033(0.1594) EVAL: [288/3883] Data 0.001 (0.003) Elapsed 0m 51s (remain 10m 36s) Loss: 0.2294(0.1623) EVAL: [306/3883] Data 0.001 (0.003) Elapsed 0m 54s (remain 10m 33s) Loss: 0.1408(0.1620) EVAL: [324/3883] Data 0.001 (0.003) Elapsed 0m 57s (remain 10m 29s) Loss: 0.0960(0.1612) EVAL: [342/3883] Data 0.001 (0.003) Elapsed 1m 0s (remain 10m 25s) Loss: 0.0254(0.1617) EVAL: [360/3883] Data 0.001 (0.003) Elapsed 1m 3s (remain 10m 22s) Loss: 0.1140(0.1621) EVAL: [378/3883] Data 0.001 (0.003) Elapsed 1m 6s (remain 10m 19s) Loss: 0.2514(0.1625) EVAL: [396/3883] Data 0.001 (0.002) Elapsed 1m 10s (remain 10m 15s) Loss: 0.2673(0.1622) EVAL: [414/3883] Data 0.001 (0.002) Elapsed 1m 13s (remain 10m 12s) Loss: 0.1760(0.1623) EVAL: [432/3883] Data 0.001 (0.002) Elapsed 1m 16s (remain 10m 8s) Loss: 0.1059(0.1627) EVAL: [450/3883] Data 0.001 (0.002) Elapsed 1m 19s (remain 10m 5s) Loss: 0.2345(0.1621) EVAL: [468/3883] Data 0.001 (0.002) Elapsed 1m 22s (remain 10m 2s) Loss: 0.1300(0.1633) EVAL: [486/3883] Data 0.001 (0.002) Elapsed 1m 25s (remain 9m 58s) Loss: 0.1591(0.1626) EVAL: [504/3883] Data 0.001 (0.002) Elapsed 1m 29s (remain 9m 55s) Loss: 0.0984(0.1630) EVAL: [522/3883] Data 0.001 (0.002) Elapsed 1m 32s (remain 9m 52s) Loss: 0.3431(0.1625) EVAL: [540/3883] Data 0.001 (0.002) Elapsed 1m 35s (remain 9m 48s) Loss: 0.0756(0.1616) EVAL: [558/3883] Data 0.001 (0.002) Elapsed 1m 38s (remain 9m 45s) Loss: 0.3011(0.1623) EVAL: [576/3883] Data 0.001 (0.002) Elapsed 1m 41s (remain 9m 42s) Loss: 0.0651(0.1615) EVAL: [594/3883] Data 0.001 (0.002) Elapsed 1m 44s (remain 9m 38s) Loss: 0.1147(0.1603) EVAL: [612/3883] Data 0.001 (0.002) Elapsed 1m 47s (remain 9m 35s) Loss: 0.1375(0.1602) EVAL: [630/3883] Data 0.001 (0.002) Elapsed 1m 51s (remain 9m 32s) Loss: 0.0742(0.1603) EVAL: [648/3883] Data 0.001 (0.002) Elapsed 1m 54s (remain 9m 29s) Loss: 0.0578(0.1606) EVAL: [666/3883] Data 0.001 (0.002) Elapsed 1m 57s (remain 9m 25s) Loss: 0.0681(0.1606) EVAL: [684/3883] Data 0.001 (0.002) Elapsed 2m 0s (remain 9m 22s) Loss: 0.1984(0.1612) EVAL: [702/3883] Data 0.001 (0.002) Elapsed 2m 3s (remain 9m 19s) Loss: 0.2155(0.1596) EVAL: [720/3883] Data 0.001 (0.002) Elapsed 2m 6s (remain 9m 16s) Loss: 0.1946(0.1593) EVAL: [738/3883] Data 0.001 (0.002) Elapsed 2m 9s (remain 9m 12s) Loss: 0.0460(0.1592) EVAL: [756/3883] Data 0.001 (0.002) Elapsed 2m 13s (remain 9m 9s) Loss: 0.2104(0.1591) EVAL: [774/3883] Data 0.001 (0.002) Elapsed 2m 16s (remain 9m 6s) Loss: 0.1124(0.1593) EVAL: [792/3883] Data 0.001 (0.002) Elapsed 2m 19s (remain 9m 3s) Loss: 0.1982(0.1603) EVAL: [810/3883] Data 0.001 (0.002) Elapsed 2m 22s (remain 9m 0s) Loss: 0.0537(0.1604) EVAL: [828/3883] Data 0.001 (0.002) Elapsed 2m 25s (remain 8m 56s) Loss: 0.1227(0.1606) EVAL: [846/3883] Data 0.001 (0.002) Elapsed 2m 28s (remain 8m 53s) Loss: 0.0865(0.1609) EVAL: [864/3883] Data 0.002 (0.002) Elapsed 2m 32s (remain 8m 50s) Loss: 0.2704(0.1620) EVAL: [882/3883] Data 0.001 (0.002) Elapsed 2m 35s (remain 8m 47s) Loss: 0.0305(0.1618) EVAL: [900/3883] Data 0.001 (0.002) Elapsed 2m 38s (remain 8m 44s) Loss: 0.2715(0.1618) EVAL: [918/3883] Data 0.001 (0.002) Elapsed 2m 41s (remain 8m 40s) Loss: 0.1925(0.1619) EVAL: [936/3883] Data 0.002 (0.002) Elapsed 2m 44s (remain 8m 37s) Loss: 0.2288(0.1619) EVAL: [954/3883] Data 0.001 (0.002) Elapsed 2m 47s (remain 8m 34s) Loss: 0.1014(0.1612) EVAL: [972/3883] Data 0.001 (0.002) Elapsed 2m 50s (remain 8m 31s) Loss: 0.0365(0.1615) EVAL: [990/3883] Data 0.001 (0.002) Elapsed 2m 54s (remain 8m 28s) Loss: 0.1667(0.1610) EVAL: [1008/3883] Data 0.001 (0.002) Elapsed 2m 57s (remain 8m 24s) Loss: 0.2637(0.1615) EVAL: [1026/3883] Data 0.001 (0.001) Elapsed 3m 0s (remain 8m 21s) Loss: 0.0475(0.1606) EVAL: [1044/3883] Data 0.001 (0.001) Elapsed 3m 3s (remain 8m 18s) Loss: 0.0820(0.1605) EVAL: [1062/3883] Data 0.001 (0.001) Elapsed 3m 6s (remain 8m 15s) Loss: 0.1155(0.1606) EVAL: [1080/3883] Data 0.001 (0.001) Elapsed 3m 9s (remain 8m 12s) Loss: 0.1442(0.1606) EVAL: [1098/3883] Data 0.001 (0.001) Elapsed 3m 13s (remain 8m 8s) Loss: 0.1222(0.1603) EVAL: [1116/3883] Data 0.001 (0.001) Elapsed 3m 16s (remain 8m 5s) Loss: 0.2785(0.1607) EVAL: [1134/3883] Data 0.001 (0.001) Elapsed 3m 19s (remain 8m 2s) Loss: 0.0810(0.1603) EVAL: [1152/3883] Data 0.001 (0.001) Elapsed 3m 22s (remain 7m 59s) Loss: 0.1948(0.1604) EVAL: [1170/3883] Data 0.001 (0.001) Elapsed 3m 25s (remain 7m 56s) Loss: 0.1612(0.1600) EVAL: [1188/3883] Data 0.001 (0.001) Elapsed 3m 28s (remain 7m 52s) Loss: 0.1162(0.1601) EVAL: [1206/3883] Data 0.001 (0.001) Elapsed 3m 31s (remain 7m 49s) Loss: 0.0833(0.1601) EVAL: [1224/3883] Data 0.002 (0.001) Elapsed 3m 35s (remain 7m 46s) Loss: 0.2616(0.1601) EVAL: [1242/3883] Data 0.001 (0.001) Elapsed 3m 38s (remain 7m 43s) Loss: 0.1812(0.1601) EVAL: [1260/3883] Data 0.001 (0.001) Elapsed 3m 41s (remain 7m 40s) Loss: 0.3908(0.1606) EVAL: [1278/3883] Data 0.001 (0.001) Elapsed 3m 44s (remain 7m 37s) Loss: 0.2581(0.1607) EVAL: [1296/3883] Data 0.001 (0.001) Elapsed 3m 47s (remain 7m 33s) Loss: 0.0572(0.1606) EVAL: [1314/3883] Data 0.001 (0.001) Elapsed 3m 50s (remain 7m 30s) Loss: 0.1650(0.1609) EVAL: [1332/3883] Data 0.001 (0.001) Elapsed 3m 53s (remain 7m 27s) Loss: 0.1224(0.1605) EVAL: [1350/3883] Data 0.001 (0.001) Elapsed 3m 57s (remain 7m 24s) Loss: 0.0769(0.1602) EVAL: [1368/3883] Data 0.001 (0.001) Elapsed 4m 0s (remain 7m 21s) Loss: 0.4134(0.1610) EVAL: [1386/3883] Data 0.001 (0.001) Elapsed 4m 3s (remain 7m 18s) Loss: 0.0569(0.1607) EVAL: [1404/3883] Data 0.001 (0.001) Elapsed 4m 6s (remain 7m 14s) Loss: 0.0992(0.1608) EVAL: [1422/3883] Data 0.001 (0.001) Elapsed 4m 9s (remain 7m 11s) Loss: 0.1436(0.1606) EVAL: [1440/3883] Data 0.001 (0.001) Elapsed 4m 12s (remain 7m 8s) Loss: 0.1273(0.1602) EVAL: [1458/3883] Data 0.001 (0.001) Elapsed 4m 16s (remain 7m 5s) Loss: 0.0942(0.1604) EVAL: [1476/3883] Data 0.001 (0.001) Elapsed 4m 19s (remain 7m 2s) Loss: 0.0917(0.1602) EVAL: [1494/3883] Data 0.001 (0.001) Elapsed 4m 22s (remain 6m 59s) Loss: 0.3089(0.1598) EVAL: [1512/3883] Data 0.001 (0.001) Elapsed 4m 25s (remain 6m 55s) Loss: 0.3519(0.1596) EVAL: [1530/3883] Data 0.001 (0.001) Elapsed 4m 28s (remain 6m 52s) Loss: 0.3325(0.1597) EVAL: [1548/3883] Data 0.001 (0.001) Elapsed 4m 31s (remain 6m 49s) Loss: 0.2314(0.1595) EVAL: [1566/3883] Data 0.001 (0.001) Elapsed 4m 34s (remain 6m 46s) Loss: 0.1941(0.1592) EVAL: [1584/3883] Data 0.001 (0.001) Elapsed 4m 38s (remain 6m 43s) Loss: 0.0591(0.1592) EVAL: [1602/3883] Data 0.001 (0.001) Elapsed 4m 41s (remain 6m 40s) Loss: 0.3192(0.1595) EVAL: [1620/3883] Data 0.001 (0.001) Elapsed 4m 44s (remain 6m 36s) Loss: 0.2999(0.1602) EVAL: [1638/3883] Data 0.001 (0.001) Elapsed 4m 47s (remain 6m 33s) Loss: 0.1067(0.1603) EVAL: [1656/3883] Data 0.001 (0.001) Elapsed 4m 50s (remain 6m 30s) Loss: 0.1351(0.1604) EVAL: [1674/3883] Data 0.001 (0.001) Elapsed 4m 53s (remain 6m 27s) Loss: 0.2332(0.1606) EVAL: [1692/3883] Data 0.001 (0.001) Elapsed 4m 56s (remain 6m 24s) Loss: 0.0895(0.1603) EVAL: [1710/3883] Data 0.001 (0.001) Elapsed 5m 0s (remain 6m 20s) Loss: 0.1498(0.1608) EVAL: [1728/3883] Data 0.001 (0.001) Elapsed 5m 3s (remain 6m 17s) Loss: 0.0905(0.1610) EVAL: [1746/3883] Data 0.001 (0.001) Elapsed 5m 6s (remain 6m 14s) Loss: 0.1193(0.1610) EVAL: [1764/3883] Data 0.001 (0.001) Elapsed 5m 9s (remain 6m 11s) Loss: 0.1140(0.1614) EVAL: [1782/3883] Data 0.001 (0.001) Elapsed 5m 12s (remain 6m 8s) Loss: 0.3481(0.1615) EVAL: [1800/3883] Data 0.001 (0.001) Elapsed 5m 15s (remain 6m 5s) Loss: 0.2563(0.1613) EVAL: [1818/3883] Data 0.001 (0.001) Elapsed 5m 19s (remain 6m 2s) Loss: 0.0577(0.1613) EVAL: [1836/3883] Data 0.001 (0.001) Elapsed 5m 22s (remain 5m 58s) Loss: 0.0808(0.1614) EVAL: [1854/3883] Data 0.001 (0.001) Elapsed 5m 25s (remain 5m 55s) Loss: 0.0103(0.1611) EVAL: [1872/3883] Data 0.001 (0.001) Elapsed 5m 28s (remain 5m 52s) Loss: 0.3210(0.1612) EVAL: [1890/3883] Data 0.001 (0.001) Elapsed 5m 31s (remain 5m 49s) Loss: 0.0612(0.1612) EVAL: [1908/3883] Data 0.001 (0.001) Elapsed 5m 34s (remain 5m 46s) Loss: 0.1735(0.1613) EVAL: [1926/3883] Data 0.001 (0.001) Elapsed 5m 37s (remain 5m 43s) Loss: 0.0412(0.1612) EVAL: [1944/3883] Data 0.001 (0.001) Elapsed 5m 41s (remain 5m 39s) Loss: 0.0557(0.1613) EVAL: [1962/3883] Data 0.001 (0.001) Elapsed 5m 44s (remain 5m 36s) Loss: 0.0927(0.1616) EVAL: [1980/3883] Data 0.001 (0.001) Elapsed 5m 47s (remain 5m 33s) Loss: 0.2122(0.1617) EVAL: [1998/3883] Data 0.001 (0.001) Elapsed 5m 50s (remain 5m 30s) Loss: 0.1713(0.1618) EVAL: [2016/3883] Data 0.001 (0.001) Elapsed 5m 53s (remain 5m 27s) Loss: 0.1183(0.1620) EVAL: [2034/3883] Data 0.001 (0.001) Elapsed 5m 56s (remain 5m 24s) Loss: 0.0553(0.1618) EVAL: [2052/3883] Data 0.001 (0.001) Elapsed 5m 59s (remain 5m 20s) Loss: 0.3482(0.1618) EVAL: [2070/3883] Data 0.001 (0.001) Elapsed 6m 3s (remain 5m 17s) Loss: 0.2087(0.1621) EVAL: [2088/3883] Data 0.001 (0.001) Elapsed 6m 6s (remain 5m 14s) Loss: 0.0497(0.1619) EVAL: [2106/3883] Data 0.001 (0.001) Elapsed 6m 9s (remain 5m 11s) Loss: 0.1427(0.1619) EVAL: [2124/3883] Data 0.001 (0.001) Elapsed 6m 12s (remain 5m 8s) Loss: 0.0933(0.1616) EVAL: [2142/3883] Data 0.002 (0.001) Elapsed 6m 15s (remain 5m 5s) Loss: 0.1509(0.1615) EVAL: [2160/3883] Data 0.001 (0.001) Elapsed 6m 18s (remain 5m 1s) Loss: 0.2055(0.1613) EVAL: [2178/3883] Data 0.001 (0.001) Elapsed 6m 22s (remain 4m 58s) Loss: 0.0383(0.1609) EVAL: [2196/3883] Data 0.001 (0.001) Elapsed 6m 25s (remain 4m 55s) Loss: 0.0791(0.1609) EVAL: [2214/3883] Data 0.001 (0.001) Elapsed 6m 28s (remain 4m 52s) Loss: 0.1740(0.1608) EVAL: [2232/3883] Data 0.001 (0.001) Elapsed 6m 31s (remain 4m 49s) Loss: 0.3226(0.1609) EVAL: [2250/3883] Data 0.001 (0.001) Elapsed 6m 34s (remain 4m 46s) Loss: 0.4714(0.1612) EVAL: [2268/3883] Data 0.001 (0.001) Elapsed 6m 37s (remain 4m 42s) Loss: 0.1312(0.1612) EVAL: [2286/3883] Data 0.001 (0.001) Elapsed 6m 40s (remain 4m 39s) Loss: 0.2954(0.1613) EVAL: [2304/3883] Data 0.001 (0.001) Elapsed 6m 44s (remain 4m 36s) Loss: 0.1997(0.1611) EVAL: [2322/3883] Data 0.001 (0.001) Elapsed 6m 47s (remain 4m 33s) Loss: 0.0700(0.1611) EVAL: [2340/3883] Data 0.001 (0.001) Elapsed 6m 50s (remain 4m 30s) Loss: 0.1696(0.1611) EVAL: [2358/3883] Data 0.001 (0.001) Elapsed 6m 53s (remain 4m 27s) Loss: 0.1497(0.1609) EVAL: [2376/3883] Data 0.001 (0.001) Elapsed 6m 56s (remain 4m 24s) Loss: 0.3126(0.1610) EVAL: [2394/3883] Data 0.001 (0.001) Elapsed 6m 59s (remain 4m 20s) Loss: 0.3481(0.1608) EVAL: [2412/3883] Data 0.002 (0.001) Elapsed 7m 3s (remain 4m 17s) Loss: 0.2191(0.1607) EVAL: [2430/3883] Data 0.001 (0.001) Elapsed 7m 6s (remain 4m 14s) Loss: 0.0873(0.1605) EVAL: [2448/3883] Data 0.001 (0.001) Elapsed 7m 9s (remain 4m 11s) Loss: 0.1328(0.1602) EVAL: [2466/3883] Data 0.001 (0.001) Elapsed 7m 12s (remain 4m 8s) Loss: 0.0985(0.1602) EVAL: [2484/3883] Data 0.001 (0.001) Elapsed 7m 15s (remain 4m 5s) Loss: 0.3652(0.1605) EVAL: [2502/3883] Data 0.001 (0.001) Elapsed 7m 18s (remain 4m 1s) Loss: 0.0634(0.1607) EVAL: [2520/3883] Data 0.001 (0.001) Elapsed 7m 21s (remain 3m 58s) Loss: 0.1097(0.1607) EVAL: [2538/3883] Data 0.001 (0.001) Elapsed 7m 25s (remain 3m 55s) Loss: 0.1674(0.1611) EVAL: [2556/3883] Data 0.001 (0.001) Elapsed 7m 28s (remain 3m 52s) Loss: 0.4341(0.1610) EVAL: [2574/3883] Data 0.001 (0.001) Elapsed 7m 31s (remain 3m 49s) Loss: 0.4961(0.1610) EVAL: [2592/3883] Data 0.001 (0.001) Elapsed 7m 34s (remain 3m 46s) Loss: 0.0531(0.1612) EVAL: [2610/3883] Data 0.001 (0.001) Elapsed 7m 37s (remain 3m 42s) Loss: 0.1743(0.1613) EVAL: [2628/3883] Data 0.002 (0.001) Elapsed 7m 40s (remain 3m 39s) Loss: 0.0543(0.1614) EVAL: [2646/3883] Data 0.001 (0.001) Elapsed 7m 43s (remain 3m 36s) Loss: 0.2201(0.1613) EVAL: [2664/3883] Data 0.001 (0.001) Elapsed 7m 47s (remain 3m 33s) Loss: 0.1047(0.1611) EVAL: [2682/3883] Data 0.001 (0.001) Elapsed 7m 50s (remain 3m 30s) Loss: 0.0751(0.1609) EVAL: [2700/3883] Data 0.001 (0.001) Elapsed 7m 53s (remain 3m 27s) Loss: 0.0896(0.1608) EVAL: [2718/3883] Data 0.001 (0.001) Elapsed 7m 56s (remain 3m 24s) Loss: 0.2584(0.1608) EVAL: [2736/3883] Data 0.001 (0.001) Elapsed 7m 59s (remain 3m 20s) Loss: 0.0306(0.1610) EVAL: [2754/3883] Data 0.001 (0.001) Elapsed 8m 2s (remain 3m 17s) Loss: 0.1155(0.1615) EVAL: [2772/3883] Data 0.001 (0.001) Elapsed 8m 6s (remain 3m 14s) Loss: 0.2865(0.1616) EVAL: [2790/3883] Data 0.001 (0.001) Elapsed 8m 9s (remain 3m 11s) Loss: 0.0568(0.1615) EVAL: [2808/3883] Data 0.001 (0.001) Elapsed 8m 12s (remain 3m 8s) Loss: 0.2481(0.1615) EVAL: [2826/3883] Data 0.001 (0.001) Elapsed 8m 15s (remain 3m 5s) Loss: 0.1486(0.1613) EVAL: [2844/3883] Data 0.001 (0.001) Elapsed 8m 18s (remain 3m 1s) Loss: 0.1604(0.1613) EVAL: [2862/3883] Data 0.001 (0.001) Elapsed 8m 21s (remain 2m 58s) Loss: 0.4025(0.1612) EVAL: [2880/3883] Data 0.001 (0.001) Elapsed 8m 24s (remain 2m 55s) Loss: 0.0399(0.1610) EVAL: [2898/3883] Data 0.001 (0.001) Elapsed 8m 28s (remain 2m 52s) Loss: 0.2110(0.1609) EVAL: [2916/3883] Data 0.001 (0.001) Elapsed 8m 31s (remain 2m 49s) Loss: 0.2526(0.1608) EVAL: [2934/3883] Data 0.001 (0.001) Elapsed 8m 34s (remain 2m 46s) Loss: 0.2118(0.1607) EVAL: [2952/3883] Data 0.001 (0.001) Elapsed 8m 37s (remain 2m 42s) Loss: 0.0318(0.1607) EVAL: [2970/3883] Data 0.002 (0.001) Elapsed 8m 40s (remain 2m 39s) Loss: 0.0850(0.1606) EVAL: [2988/3883] Data 0.001 (0.001) Elapsed 8m 43s (remain 2m 36s) Loss: 0.0990(0.1606) EVAL: [3006/3883] Data 0.001 (0.001) Elapsed 8m 46s (remain 2m 33s) Loss: 0.1794(0.1605) EVAL: [3024/3883] Data 0.001 (0.001) Elapsed 8m 50s (remain 2m 30s) Loss: 0.1962(0.1606) EVAL: [3042/3883] Data 0.001 (0.001) Elapsed 8m 53s (remain 2m 27s) Loss: 0.0895(0.1604) EVAL: [3060/3883] Data 0.001 (0.001) Elapsed 8m 56s (remain 2m 24s) Loss: 0.1145(0.1603) EVAL: [3078/3883] Data 0.002 (0.001) Elapsed 8m 59s (remain 2m 20s) Loss: 0.1884(0.1603) EVAL: [3096/3883] Data 0.001 (0.001) Elapsed 9m 2s (remain 2m 17s) Loss: 0.5165(0.1604) EVAL: [3114/3883] Data 0.001 (0.001) Elapsed 9m 5s (remain 2m 14s) Loss: 0.2237(0.1604) EVAL: [3132/3883] Data 0.001 (0.001) Elapsed 9m 9s (remain 2m 11s) Loss: 0.1928(0.1603) EVAL: [3150/3883] Data 0.001 (0.001) Elapsed 9m 12s (remain 2m 8s) Loss: 0.1161(0.1602) EVAL: [3168/3883] Data 0.001 (0.001) Elapsed 9m 15s (remain 2m 5s) Loss: 0.4552(0.1601) EVAL: [3186/3883] Data 0.001 (0.001) Elapsed 9m 18s (remain 2m 1s) Loss: 0.0659(0.1600) EVAL: [3204/3883] Data 0.001 (0.001) Elapsed 9m 21s (remain 1m 58s) Loss: 0.1388(0.1600) EVAL: [3222/3883] Data 0.001 (0.001) Elapsed 9m 24s (remain 1m 55s) Loss: 0.1322(0.1600) EVAL: [3240/3883] Data 0.001 (0.001) Elapsed 9m 27s (remain 1m 52s) Loss: 0.4383(0.1601) EVAL: [3258/3883] Data 0.001 (0.001) Elapsed 9m 31s (remain 1m 49s) Loss: 0.0369(0.1601) EVAL: [3276/3883] Data 0.002 (0.001) Elapsed 9m 34s (remain 1m 46s) Loss: 0.1538(0.1600) EVAL: [3294/3883] Data 0.001 (0.001) Elapsed 9m 37s (remain 1m 43s) Loss: 0.1418(0.1601) EVAL: [3312/3883] Data 0.001 (0.001) Elapsed 9m 40s (remain 1m 39s) Loss: 0.0165(0.1602) EVAL: [3330/3883] Data 0.001 (0.001) Elapsed 9m 43s (remain 1m 36s) Loss: 0.2204(0.1600) EVAL: [3348/3883] Data 0.001 (0.001) Elapsed 9m 46s (remain 1m 33s) Loss: 0.1072(0.1600) EVAL: [3366/3883] Data 0.001 (0.001) Elapsed 9m 50s (remain 1m 30s) Loss: 0.0789(0.1599) EVAL: [3384/3883] Data 0.001 (0.001) Elapsed 9m 53s (remain 1m 27s) Loss: 0.2069(0.1600) EVAL: [3402/3883] Data 0.001 (0.001) Elapsed 9m 56s (remain 1m 24s) Loss: 0.2183(0.1601) EVAL: [3420/3883] Data 0.001 (0.001) Elapsed 9m 59s (remain 1m 20s) Loss: 0.2647(0.1599) EVAL: [3438/3883] Data 0.001 (0.001) Elapsed 10m 2s (remain 1m 17s) Loss: 0.1387(0.1601) EVAL: [3456/3883] Data 0.001 (0.001) Elapsed 10m 5s (remain 1m 14s) Loss: 0.2248(0.1601) EVAL: [3474/3883] Data 0.001 (0.001) Elapsed 10m 8s (remain 1m 11s) Loss: 0.0411(0.1601) EVAL: [3492/3883] Data 0.001 (0.001) Elapsed 10m 12s (remain 1m 8s) Loss: 0.1656(0.1602) EVAL: [3510/3883] Data 0.001 (0.001) Elapsed 10m 15s (remain 1m 5s) Loss: 0.0819(0.1601) EVAL: [3528/3883] Data 0.001 (0.001) Elapsed 10m 18s (remain 1m 2s) Loss: 0.5300(0.1602) EVAL: [3546/3883] Data 0.001 (0.001) Elapsed 10m 21s (remain 0m 58s) Loss: 0.2949(0.1603) EVAL: [3564/3883] Data 0.001 (0.001) Elapsed 10m 24s (remain 0m 55s) Loss: 0.2626(0.1601) EVAL: [3582/3883] Data 0.001 (0.001) Elapsed 10m 27s (remain 0m 52s) Loss: 0.0641(0.1601) EVAL: [3600/3883] Data 0.001 (0.001) Elapsed 10m 30s (remain 0m 49s) Loss: 0.1033(0.1601) EVAL: [3618/3883] Data 0.001 (0.001) Elapsed 10m 34s (remain 0m 46s) Loss: 0.2881(0.1601) EVAL: [3636/3883] Data 0.001 (0.001) Elapsed 10m 37s (remain 0m 43s) Loss: 0.1004(0.1603) EVAL: [3654/3883] Data 0.001 (0.001) Elapsed 10m 40s (remain 0m 39s) Loss: 0.1796(0.1602) EVAL: [3672/3883] Data 0.001 (0.001) Elapsed 10m 43s (remain 0m 36s) Loss: 0.3772(0.1604) EVAL: [3690/3883] Data 0.001 (0.001) Elapsed 10m 46s (remain 0m 33s) Loss: 0.0389(0.1604) EVAL: [3708/3883] Data 0.001 (0.001) Elapsed 10m 49s (remain 0m 30s) Loss: 0.1311(0.1605) EVAL: [3726/3883] Data 0.002 (0.001) Elapsed 10m 53s (remain 0m 27s) Loss: 0.1450(0.1604) EVAL: [3744/3883] Data 0.001 (0.001) Elapsed 10m 56s (remain 0m 24s) Loss: 0.1706(0.1603) EVAL: [3762/3883] Data 0.001 (0.001) Elapsed 10m 59s (remain 0m 21s) Loss: 0.0853(0.1603) EVAL: [3780/3883] Data 0.001 (0.001) Elapsed 11m 2s (remain 0m 17s) Loss: 0.0261(0.1602) EVAL: [3798/3883] Data 0.001 (0.001) Elapsed 11m 5s (remain 0m 14s) Loss: 0.0367(0.1603) EVAL: [3816/3883] Data 0.001 (0.001) Elapsed 11m 8s (remain 0m 11s) Loss: 0.0487(0.1602) EVAL: [3834/3883] Data 0.001 (0.001) Elapsed 11m 11s (remain 0m 8s) Loss: 0.0623(0.1603) EVAL: [3852/3883] Data 0.001 (0.001) Elapsed 11m 15s (remain 0m 5s) Loss: 0.2256(0.1603) EVAL: [3870/3883] Data 0.001 (0.001) Elapsed 11m 18s (remain 0m 2s) Loss: 0.1332(0.1601)
Epoch 2 - avg_train_loss: 0.4704 avg_val_loss: 0.1602 time: 1126s Epoch 2 - AUC: 0.9479023007024554 - pAUC: 0.16179566496960318 Epoch 2 - Save Best Score: 0.1618 Model
EVAL: [3882/3883] Data 0.001 (0.001) Elapsed 11m 20s (remain 0m 0s) Loss: 0.1898(0.1602) Epoch: [3][0/650] Data 0.828 (0.828) Elapsed 0m 1s (remain 13m 58s) Loss: 0.3784(0.3784) Grad: 11.2917 LR: 0.000091 Epoch: [3][18/650] Data 0.268 (0.293) Elapsed 0m 13s (remain 7m 30s) Loss: 0.2873(0.4024) Grad: 10.2432 LR: 0.000091 Epoch: [3][36/650] Data 0.272 (0.281) Elapsed 0m 25s (remain 7m 8s) Loss: 0.9090(0.4081) Grad: 28.8463 LR: 0.000091 Epoch: [3][54/650] Data 0.261 (0.277) Elapsed 0m 38s (remain 6m 52s) Loss: 0.2889(0.4113) Grad: 9.4664 LR: 0.000091 Epoch: [3][72/650] Data 0.264 (0.275) Elapsed 0m 50s (remain 6m 38s) Loss: 0.4738(0.4275) Grad: 12.0205 LR: 0.000091 Epoch: [3][90/650] Data 0.251 (0.274) Elapsed 1m 2s (remain 6m 25s) Loss: 1.1058(0.4364) Grad: 30.0984 LR: 0.000091 Epoch: [3][108/650] Data 0.268 (0.272) Elapsed 1m 15s (remain 6m 12s) Loss: 0.4917(0.4317) Grad: 11.7421 LR: 0.000091 Epoch: [3][126/650] Data 0.252 (0.271) Elapsed 1m 27s (remain 5m 59s) Loss: 0.5872(0.4287) Grad: 16.3690 LR: 0.000091 Epoch: [3][144/650] Data 0.270 (0.271) Elapsed 1m 39s (remain 5m 47s) Loss: 0.4533(0.4239) Grad: 9.2913 LR: 0.000091 Epoch: [3][162/650] Data 0.264 (0.271) Elapsed 1m 52s (remain 5m 34s) Loss: 0.4157(0.4164) Grad: 10.7173 LR: 0.000091 Epoch: [3][180/650] Data 0.267 (0.269) Elapsed 2m 4s (remain 5m 22s) Loss: 0.6966(0.4287) Grad: 15.1735 LR: 0.000091 Epoch: [3][198/650] Data 0.273 (0.269) Elapsed 2m 16s (remain 5m 9s) Loss: 0.5989(0.4238) Grad: 22.1596 LR: 0.000091 Epoch: [3][216/650] Data 0.272 (0.268) Elapsed 2m 28s (remain 4m 57s) Loss: 0.3471(0.4190) Grad: 10.7571 LR: 0.000091 Epoch: [3][234/650] Data 0.273 (0.268) Elapsed 2m 41s (remain 4m 44s) Loss: 0.3999(0.4224) Grad: 11.3680 LR: 0.000091 Epoch: [3][252/650] Data 0.267 (0.268) Elapsed 2m 53s (remain 4m 32s) Loss: 0.2138(0.4191) Grad: 7.7851 LR: 0.000091 Epoch: [3][270/650] Data 0.263 (0.268) Elapsed 3m 5s (remain 4m 19s) Loss: 0.3282(0.4192) Grad: 8.2922 LR: 0.000091 Epoch: [3][288/650] Data 0.271 (0.269) Elapsed 3m 18s (remain 4m 7s) Loss: 0.3682(0.4217) Grad: 11.6487 LR: 0.000091 Epoch: [3][306/650] Data 0.272 (0.269) Elapsed 3m 30s (remain 3m 55s) Loss: 0.3138(0.4248) Grad: 11.0097 LR: 0.000091 Epoch: [3][324/650] Data 0.271 (0.269) Elapsed 3m 42s (remain 3m 42s) Loss: 0.1658(0.4227) Grad: 7.1503 LR: 0.000091 Epoch: [3][342/650] Data 0.271 (0.269) Elapsed 3m 55s (remain 3m 30s) Loss: 0.3155(0.4205) Grad: 8.8760 LR: 0.000091 Epoch: [3][360/650] Data 0.266 (0.269) Elapsed 4m 7s (remain 3m 18s) Loss: 0.3396(0.4193) Grad: 10.1301 LR: 0.000091 Epoch: [3][378/650] Data 0.266 (0.269) Elapsed 4m 19s (remain 3m 5s) Loss: 0.5675(0.4191) Grad: 14.2463 LR: 0.000091 Epoch: [3][396/650] Data 0.270 (0.269) Elapsed 4m 32s (remain 2m 53s) Loss: 0.4167(0.4187) Grad: 12.4221 LR: 0.000091 Epoch: [3][414/650] Data 0.258 (0.269) Elapsed 4m 44s (remain 2m 41s) Loss: 0.6098(0.4167) Grad: 13.9772 LR: 0.000091 Epoch: [3][432/650] Data 0.261 (0.268) Elapsed 4m 56s (remain 2m 28s) Loss: 0.3655(0.4157) Grad: 9.4036 LR: 0.000091 Epoch: [3][450/650] Data 0.272 (0.268) Elapsed 5m 8s (remain 2m 16s) Loss: 0.4631(0.4175) Grad: 11.3350 LR: 0.000091 Epoch: [3][468/650] Data 0.259 (0.268) Elapsed 5m 21s (remain 2m 3s) Loss: 0.2234(0.4194) Grad: 7.3291 LR: 0.000091 Epoch: [3][486/650] Data 0.271 (0.268) Elapsed 5m 33s (remain 1m 51s) Loss: 0.2298(0.4184) Grad: 9.0037 LR: 0.000091 Epoch: [3][504/650] Data 0.272 (0.268) Elapsed 5m 45s (remain 1m 39s) Loss: 0.1840(0.4182) Grad: 6.3553 LR: 0.000091 Epoch: [3][522/650] Data 0.271 (0.268) Elapsed 5m 58s (remain 1m 26s) Loss: 0.1830(0.4170) Grad: 7.4448 LR: 0.000091 Epoch: [3][540/650] Data 0.270 (0.268) Elapsed 6m 10s (remain 1m 14s) Loss: 0.4438(0.4147) Grad: 11.7441 LR: 0.000091 Epoch: [3][558/650] Data 0.270 (0.268) Elapsed 6m 22s (remain 1m 2s) Loss: 0.4030(0.4136) Grad: 11.9621 LR: 0.000091 Epoch: [3][576/650] Data 0.258 (0.268) Elapsed 6m 35s (remain 0m 49s) Loss: 0.3255(0.4111) Grad: 9.3649 LR: 0.000091 Epoch: [3][594/650] Data 0.271 (0.268) Elapsed 6m 47s (remain 0m 37s) Loss: 0.3872(0.4110) Grad: 10.8818 LR: 0.000091 Epoch: [3][612/650] Data 0.271 (0.268) Elapsed 6m 59s (remain 0m 25s) Loss: 0.3804(0.4101) Grad: 11.1846 LR: 0.000091 Epoch: [3][630/650] Data 0.270 (0.268) Elapsed 7m 12s (remain 0m 13s) Loss: 0.7112(0.4090) Grad: 16.2961 LR: 0.000091 Epoch: [3][648/650] Data 0.271 (0.268) Elapsed 7m 24s (remain 0m 0s) Loss: 0.2905(0.4076) Grad: 7.7534 LR: 0.000091 Epoch: [3][649/650] Data 0.273 (0.268) Elapsed 7m 25s (remain 0m 0s) Loss: 0.2781(0.4074) Grad: 7.3812 LR: 0.000091 EVAL: [0/3883] Data 0.601 (0.601) Elapsed 0m 0s (remain 50m 23s) Loss: 0.2910(0.2910) EVAL: [18/3883] Data 0.001 (0.033) Elapsed 0m 3s (remain 13m 20s) Loss: 0.2816(0.1745) EVAL: [36/3883] Data 0.001 (0.017) Elapsed 0m 7s (remain 12m 16s) Loss: 0.1325(0.1669) EVAL: [54/3883] Data 0.001 (0.012) Elapsed 0m 10s (remain 11m 52s) Loss: 0.0291(0.1599) EVAL: [72/3883] Data 0.001 (0.009) Elapsed 0m 13s (remain 11m 38s) Loss: 0.2609(0.1723) EVAL: [90/3883] Data 0.001 (0.008) Elapsed 0m 16s (remain 11m 29s) Loss: 0.3429(0.1755) EVAL: [108/3883] Data 0.001 (0.006) Elapsed 0m 19s (remain 11m 21s) Loss: 0.3869(0.1838) EVAL: [126/3883] Data 0.001 (0.006) Elapsed 0m 22s (remain 11m 15s) Loss: 0.3862(0.1921) EVAL: [144/3883] Data 0.001 (0.005) Elapsed 0m 25s (remain 11m 10s) Loss: 0.1768(0.1918) EVAL: [162/3883] Data 0.001 (0.005) Elapsed 0m 29s (remain 11m 5s) Loss: 0.0688(0.1921) EVAL: [180/3883] Data 0.001 (0.004) Elapsed 0m 32s (remain 11m 0s) Loss: 0.0352(0.1871) EVAL: [198/3883] Data 0.001 (0.004) Elapsed 0m 35s (remain 10m 56s) Loss: 0.1562(0.1884) EVAL: [216/3883] Data 0.001 (0.004) Elapsed 0m 38s (remain 10m 52s) Loss: 0.1298(0.1879) EVAL: [234/3883] Data 0.001 (0.004) Elapsed 0m 41s (remain 10m 48s) Loss: 0.1438(0.1865) EVAL: [252/3883] Data 0.001 (0.003) Elapsed 0m 44s (remain 10m 44s) Loss: 0.2176(0.1825) EVAL: [270/3883] Data 0.001 (0.003) Elapsed 0m 48s (remain 10m 40s) Loss: 0.1207(0.1848) EVAL: [288/3883] Data 0.001 (0.003) Elapsed 0m 51s (remain 10m 36s) Loss: 0.3173(0.1880) EVAL: [306/3883] Data 0.001 (0.003) Elapsed 0m 54s (remain 10m 33s) Loss: 0.1768(0.1878) EVAL: [324/3883] Data 0.001 (0.003) Elapsed 0m 57s (remain 10m 29s) Loss: 0.1491(0.1880) EVAL: [342/3883] Data 0.002 (0.003) Elapsed 1m 0s (remain 10m 26s) Loss: 0.0209(0.1888) EVAL: [360/3883] Data 0.001 (0.003) Elapsed 1m 3s (remain 10m 22s) Loss: 0.1015(0.1891) EVAL: [378/3883] Data 0.002 (0.003) Elapsed 1m 6s (remain 10m 19s) Loss: 0.1924(0.1896) EVAL: [396/3883] Data 0.001 (0.002) Elapsed 1m 10s (remain 10m 15s) Loss: 0.3370(0.1884) EVAL: [414/3883] Data 0.001 (0.002) Elapsed 1m 13s (remain 10m 12s) Loss: 0.2262(0.1890) EVAL: [432/3883] Data 0.001 (0.002) Elapsed 1m 16s (remain 10m 9s) Loss: 0.1208(0.1891) EVAL: [450/3883] Data 0.001 (0.002) Elapsed 1m 19s (remain 10m 5s) Loss: 0.3187(0.1903) EVAL: [468/3883] Data 0.001 (0.002) Elapsed 1m 22s (remain 10m 2s) Loss: 0.0561(0.1919) EVAL: [486/3883] Data 0.001 (0.002) Elapsed 1m 25s (remain 9m 59s) Loss: 0.2520(0.1913) EVAL: [504/3883] Data 0.001 (0.002) Elapsed 1m 29s (remain 9m 55s) Loss: 0.1054(0.1917) EVAL: [522/3883] Data 0.001 (0.002) Elapsed 1m 32s (remain 9m 52s) Loss: 0.4699(0.1912) EVAL: [540/3883] Data 0.001 (0.002) Elapsed 1m 35s (remain 9m 49s) Loss: 0.1210(0.1903) EVAL: [558/3883] Data 0.001 (0.002) Elapsed 1m 38s (remain 9m 45s) Loss: 0.3499(0.1911) EVAL: [576/3883] Data 0.001 (0.002) Elapsed 1m 41s (remain 9m 42s) Loss: 0.0792(0.1903) EVAL: [594/3883] Data 0.001 (0.002) Elapsed 1m 44s (remain 9m 39s) Loss: 0.1545(0.1892) EVAL: [612/3883] Data 0.001 (0.002) Elapsed 1m 47s (remain 9m 35s) Loss: 0.1606(0.1894) EVAL: [630/3883] Data 0.001 (0.002) Elapsed 1m 51s (remain 9m 32s) Loss: 0.1086(0.1893) EVAL: [648/3883] Data 0.001 (0.002) Elapsed 1m 54s (remain 9m 29s) Loss: 0.0478(0.1894) EVAL: [666/3883] Data 0.001 (0.002) Elapsed 1m 57s (remain 9m 26s) Loss: 0.1153(0.1893) EVAL: [684/3883] Data 0.001 (0.002) Elapsed 2m 0s (remain 9m 22s) Loss: 0.1421(0.1894) EVAL: [702/3883] Data 0.001 (0.002) Elapsed 2m 3s (remain 9m 19s) Loss: 0.1611(0.1876) EVAL: [720/3883] Data 0.001 (0.002) Elapsed 2m 6s (remain 9m 16s) Loss: 0.2463(0.1874) EVAL: [738/3883] Data 0.001 (0.002) Elapsed 2m 10s (remain 9m 13s) Loss: 0.0635(0.1874) EVAL: [756/3883] Data 0.001 (0.002) Elapsed 2m 13s (remain 9m 9s) Loss: 0.1968(0.1873) EVAL: [774/3883] Data 0.001 (0.002) Elapsed 2m 16s (remain 9m 6s) Loss: 0.1018(0.1873) EVAL: [792/3883] Data 0.001 (0.002) Elapsed 2m 19s (remain 9m 3s) Loss: 0.2082(0.1881) EVAL: [810/3883] Data 0.001 (0.002) Elapsed 2m 22s (remain 9m 0s) Loss: 0.0407(0.1881) EVAL: [828/3883] Data 0.001 (0.002) Elapsed 2m 25s (remain 8m 57s) Loss: 0.0566(0.1887) EVAL: [846/3883] Data 0.001 (0.002) Elapsed 2m 28s (remain 8m 53s) Loss: 0.0754(0.1893) EVAL: [864/3883] Data 0.001 (0.002) Elapsed 2m 32s (remain 8m 50s) Loss: 0.3035(0.1901) EVAL: [882/3883] Data 0.001 (0.002) Elapsed 2m 35s (remain 8m 47s) Loss: 0.0298(0.1898) EVAL: [900/3883] Data 0.001 (0.002) Elapsed 2m 38s (remain 8m 44s) Loss: 0.2744(0.1896) EVAL: [918/3883] Data 0.001 (0.002) Elapsed 2m 41s (remain 8m 41s) Loss: 0.2739(0.1902) EVAL: [936/3883] Data 0.001 (0.002) Elapsed 2m 44s (remain 8m 37s) Loss: 0.2717(0.1901) EVAL: [954/3883] Data 0.001 (0.002) Elapsed 2m 47s (remain 8m 34s) Loss: 0.1349(0.1895) EVAL: [972/3883] Data 0.001 (0.002) Elapsed 2m 51s (remain 8m 31s) Loss: 0.0522(0.1896) EVAL: [990/3883] Data 0.001 (0.002) Elapsed 2m 54s (remain 8m 28s) Loss: 0.1668(0.1892) EVAL: [1008/3883] Data 0.001 (0.002) Elapsed 2m 57s (remain 8m 25s) Loss: 0.3312(0.1895) EVAL: [1026/3883] Data 0.001 (0.002) Elapsed 3m 0s (remain 8m 21s) Loss: 0.0278(0.1886) EVAL: [1044/3883] Data 0.001 (0.002) Elapsed 3m 3s (remain 8m 18s) Loss: 0.1095(0.1888) EVAL: [1062/3883] Data 0.001 (0.002) Elapsed 3m 6s (remain 8m 15s) Loss: 0.2189(0.1888) EVAL: [1080/3883] Data 0.001 (0.002) Elapsed 3m 9s (remain 8m 12s) Loss: 0.1481(0.1884) EVAL: [1098/3883] Data 0.001 (0.001) Elapsed 3m 13s (remain 8m 9s) Loss: 0.1550(0.1878) EVAL: [1116/3883] Data 0.001 (0.001) Elapsed 3m 16s (remain 8m 5s) Loss: 0.1831(0.1881) EVAL: [1134/3883] Data 0.001 (0.001) Elapsed 3m 19s (remain 8m 2s) Loss: 0.0593(0.1877) EVAL: [1152/3883] Data 0.001 (0.001) Elapsed 3m 22s (remain 7m 59s) Loss: 0.2231(0.1880) EVAL: [1170/3883] Data 0.001 (0.001) Elapsed 3m 25s (remain 7m 56s) Loss: 0.1820(0.1874) EVAL: [1188/3883] Data 0.001 (0.001) Elapsed 3m 28s (remain 7m 53s) Loss: 0.2113(0.1875) EVAL: [1206/3883] Data 0.001 (0.001) Elapsed 3m 32s (remain 7m 50s) Loss: 0.0854(0.1872) EVAL: [1224/3883] Data 0.001 (0.001) Elapsed 3m 35s (remain 7m 46s) Loss: 0.3458(0.1872) EVAL: [1242/3883] Data 0.001 (0.001) Elapsed 3m 38s (remain 7m 43s) Loss: 0.2700(0.1875) EVAL: [1260/3883] Data 0.001 (0.001) Elapsed 3m 41s (remain 7m 40s) Loss: 0.4275(0.1879) EVAL: [1278/3883] Data 0.001 (0.001) Elapsed 3m 44s (remain 7m 37s) Loss: 0.2915(0.1882) EVAL: [1296/3883] Data 0.001 (0.001) Elapsed 3m 47s (remain 7m 34s) Loss: 0.0369(0.1881) EVAL: [1314/3883] Data 0.001 (0.001) Elapsed 3m 50s (remain 7m 30s) Loss: 0.1637(0.1884) EVAL: [1332/3883] Data 0.001 (0.001) Elapsed 3m 54s (remain 7m 27s) Loss: 0.2470(0.1881) EVAL: [1350/3883] Data 0.001 (0.001) Elapsed 3m 57s (remain 7m 24s) Loss: 0.1018(0.1882) EVAL: [1368/3883] Data 0.001 (0.001) Elapsed 4m 0s (remain 7m 21s) Loss: 0.4251(0.1893) EVAL: [1386/3883] Data 0.001 (0.001) Elapsed 4m 3s (remain 7m 18s) Loss: 0.1210(0.1890) EVAL: [1404/3883] Data 0.001 (0.001) Elapsed 4m 6s (remain 7m 15s) Loss: 0.1756(0.1891) EVAL: [1422/3883] Data 0.002 (0.001) Elapsed 4m 9s (remain 7m 11s) Loss: 0.1631(0.1890) EVAL: [1440/3883] Data 0.001 (0.001) Elapsed 4m 12s (remain 7m 8s) Loss: 0.2113(0.1884) EVAL: [1458/3883] Data 0.001 (0.001) Elapsed 4m 16s (remain 7m 5s) Loss: 0.1262(0.1886) EVAL: [1476/3883] Data 0.001 (0.001) Elapsed 4m 19s (remain 7m 2s) Loss: 0.2412(0.1883) EVAL: [1494/3883] Data 0.001 (0.001) Elapsed 4m 22s (remain 6m 59s) Loss: 0.3796(0.1882) EVAL: [1512/3883] Data 0.001 (0.001) Elapsed 4m 25s (remain 6m 56s) Loss: 0.4339(0.1882) EVAL: [1530/3883] Data 0.001 (0.001) Elapsed 4m 28s (remain 6m 52s) Loss: 0.2757(0.1882) EVAL: [1548/3883] Data 0.001 (0.001) Elapsed 4m 31s (remain 6m 49s) Loss: 0.2337(0.1880) EVAL: [1566/3883] Data 0.001 (0.001) Elapsed 4m 35s (remain 6m 46s) Loss: 0.2390(0.1877) EVAL: [1584/3883] Data 0.001 (0.001) Elapsed 4m 38s (remain 6m 43s) Loss: 0.0368(0.1878) EVAL: [1602/3883] Data 0.001 (0.001) Elapsed 4m 41s (remain 6m 40s) Loss: 0.2864(0.1881) EVAL: [1620/3883] Data 0.002 (0.001) Elapsed 4m 44s (remain 6m 37s) Loss: 0.3668(0.1889) EVAL: [1638/3883] Data 0.001 (0.001) Elapsed 4m 47s (remain 6m 33s) Loss: 0.2261(0.1891) EVAL: [1656/3883] Data 0.001 (0.001) Elapsed 4m 50s (remain 6m 30s) Loss: 0.1493(0.1892) EVAL: [1674/3883] Data 0.001 (0.001) Elapsed 4m 53s (remain 6m 27s) Loss: 0.2048(0.1894) EVAL: [1692/3883] Data 0.001 (0.001) Elapsed 4m 57s (remain 6m 24s) Loss: 0.1273(0.1892) EVAL: [1710/3883] Data 0.001 (0.001) Elapsed 5m 0s (remain 6m 21s) Loss: 0.1908(0.1898) EVAL: [1728/3883] Data 0.001 (0.001) Elapsed 5m 3s (remain 6m 18s) Loss: 0.1013(0.1901) EVAL: [1746/3883] Data 0.001 (0.001) Elapsed 5m 6s (remain 6m 14s) Loss: 0.1721(0.1903) EVAL: [1764/3883] Data 0.001 (0.001) Elapsed 5m 9s (remain 6m 11s) Loss: 0.1395(0.1906) EVAL: [1782/3883] Data 0.001 (0.001) Elapsed 5m 12s (remain 6m 8s) Loss: 0.3675(0.1906) EVAL: [1800/3883] Data 0.001 (0.001) Elapsed 5m 16s (remain 6m 5s) Loss: 0.2951(0.1905) EVAL: [1818/3883] Data 0.002 (0.001) Elapsed 5m 19s (remain 6m 2s) Loss: 0.0614(0.1905) EVAL: [1836/3883] Data 0.001 (0.001) Elapsed 5m 22s (remain 5m 59s) Loss: 0.1094(0.1907) EVAL: [1854/3883] Data 0.001 (0.001) Elapsed 5m 25s (remain 5m 55s) Loss: 0.0195(0.1906) EVAL: [1872/3883] Data 0.001 (0.001) Elapsed 5m 28s (remain 5m 52s) Loss: 0.3539(0.1907) EVAL: [1890/3883] Data 0.001 (0.001) Elapsed 5m 31s (remain 5m 49s) Loss: 0.1111(0.1907) EVAL: [1908/3883] Data 0.001 (0.001) Elapsed 5m 34s (remain 5m 46s) Loss: 0.1746(0.1907) EVAL: [1926/3883] Data 0.001 (0.001) Elapsed 5m 38s (remain 5m 43s) Loss: 0.0203(0.1905) EVAL: [1944/3883] Data 0.001 (0.001) Elapsed 5m 41s (remain 5m 40s) Loss: 0.0824(0.1906) EVAL: [1962/3883] Data 0.001 (0.001) Elapsed 5m 44s (remain 5m 36s) Loss: 0.1172(0.1910) EVAL: [1980/3883] Data 0.001 (0.001) Elapsed 5m 47s (remain 5m 33s) Loss: 0.2845(0.1912) EVAL: [1998/3883] Data 0.001 (0.001) Elapsed 5m 50s (remain 5m 30s) Loss: 0.1303(0.1912) EVAL: [2016/3883] Data 0.001 (0.001) Elapsed 5m 53s (remain 5m 27s) Loss: 0.1271(0.1914) EVAL: [2034/3883] Data 0.001 (0.001) Elapsed 5m 57s (remain 5m 24s) Loss: 0.0948(0.1913) EVAL: [2052/3883] Data 0.001 (0.001) Elapsed 6m 0s (remain 5m 21s) Loss: 0.4554(0.1911) EVAL: [2070/3883] Data 0.001 (0.001) Elapsed 6m 3s (remain 5m 17s) Loss: 0.2849(0.1915) EVAL: [2088/3883] Data 0.002 (0.001) Elapsed 6m 6s (remain 5m 14s) Loss: 0.0671(0.1912) EVAL: [2106/3883] Data 0.001 (0.001) Elapsed 6m 9s (remain 5m 11s) Loss: 0.1996(0.1912) EVAL: [2124/3883] Data 0.001 (0.001) Elapsed 6m 12s (remain 5m 8s) Loss: 0.0951(0.1908) EVAL: [2142/3883] Data 0.001 (0.001) Elapsed 6m 15s (remain 5m 5s) Loss: 0.1733(0.1906) EVAL: [2160/3883] Data 0.001 (0.001) Elapsed 6m 19s (remain 5m 2s) Loss: 0.1800(0.1903) EVAL: [2178/3883] Data 0.001 (0.001) Elapsed 6m 22s (remain 4m 58s) Loss: 0.0636(0.1900) EVAL: [2196/3883] Data 0.001 (0.001) Elapsed 6m 25s (remain 4m 55s) Loss: 0.1252(0.1901) EVAL: [2214/3883] Data 0.001 (0.001) Elapsed 6m 28s (remain 4m 52s) Loss: 0.1880(0.1900) EVAL: [2232/3883] Data 0.001 (0.001) Elapsed 6m 31s (remain 4m 49s) Loss: 0.2995(0.1900) EVAL: [2250/3883] Data 0.001 (0.001) Elapsed 6m 34s (remain 4m 46s) Loss: 0.5661(0.1902) EVAL: [2268/3883] Data 0.001 (0.001) Elapsed 6m 38s (remain 4m 43s) Loss: 0.0872(0.1903) EVAL: [2286/3883] Data 0.002 (0.001) Elapsed 6m 41s (remain 4m 39s) Loss: 0.3239(0.1901) EVAL: [2304/3883] Data 0.001 (0.001) Elapsed 6m 44s (remain 4m 36s) Loss: 0.2871(0.1900) EVAL: [2322/3883] Data 0.001 (0.001) Elapsed 6m 47s (remain 4m 33s) Loss: 0.0610(0.1898) EVAL: [2340/3883] Data 0.001 (0.001) Elapsed 6m 50s (remain 4m 30s) Loss: 0.2039(0.1897) EVAL: [2358/3883] Data 0.001 (0.001) Elapsed 6m 53s (remain 4m 27s) Loss: 0.3252(0.1895) EVAL: [2376/3883] Data 0.001 (0.001) Elapsed 6m 56s (remain 4m 24s) Loss: 0.3446(0.1895) EVAL: [2394/3883] Data 0.001 (0.001) Elapsed 7m 0s (remain 4m 20s) Loss: 0.4761(0.1894) EVAL: [2412/3883] Data 0.001 (0.001) Elapsed 7m 3s (remain 4m 17s) Loss: 0.2868(0.1895) EVAL: [2430/3883] Data 0.001 (0.001) Elapsed 7m 6s (remain 4m 14s) Loss: 0.0533(0.1892) EVAL: [2448/3883] Data 0.001 (0.001) Elapsed 7m 9s (remain 4m 11s) Loss: 0.1333(0.1889) EVAL: [2466/3883] Data 0.001 (0.001) Elapsed 7m 12s (remain 4m 8s) Loss: 0.1463(0.1888) EVAL: [2484/3883] Data 0.001 (0.001) Elapsed 7m 15s (remain 4m 5s) Loss: 0.2548(0.1890) EVAL: [2502/3883] Data 0.001 (0.001) Elapsed 7m 18s (remain 4m 2s) Loss: 0.0860(0.1893) EVAL: [2520/3883] Data 0.001 (0.001) Elapsed 7m 22s (remain 3m 58s) Loss: 0.1616(0.1891) EVAL: [2538/3883] Data 0.001 (0.001) Elapsed 7m 25s (remain 3m 55s) Loss: 0.2004(0.1894) EVAL: [2556/3883] Data 0.001 (0.001) Elapsed 7m 28s (remain 3m 52s) Loss: 0.5147(0.1893) EVAL: [2574/3883] Data 0.001 (0.001) Elapsed 7m 31s (remain 3m 49s) Loss: 0.4913(0.1892) EVAL: [2592/3883] Data 0.001 (0.001) Elapsed 7m 34s (remain 3m 46s) Loss: 0.0614(0.1893) EVAL: [2610/3883] Data 0.001 (0.001) Elapsed 7m 37s (remain 3m 43s) Loss: 0.2861(0.1895) EVAL: [2628/3883] Data 0.001 (0.001) Elapsed 7m 41s (remain 3m 39s) Loss: 0.1018(0.1895) EVAL: [2646/3883] Data 0.001 (0.001) Elapsed 7m 44s (remain 3m 36s) Loss: 0.2592(0.1895) EVAL: [2664/3883] Data 0.001 (0.001) Elapsed 7m 47s (remain 3m 33s) Loss: 0.1314(0.1892) EVAL: [2682/3883] Data 0.001 (0.001) Elapsed 7m 50s (remain 3m 30s) Loss: 0.1332(0.1890) EVAL: [2700/3883] Data 0.001 (0.001) Elapsed 7m 53s (remain 3m 27s) Loss: 0.1461(0.1889) EVAL: [2718/3883] Data 0.001 (0.001) Elapsed 7m 56s (remain 3m 24s) Loss: 0.3235(0.1888) EVAL: [2736/3883] Data 0.001 (0.001) Elapsed 7m 59s (remain 3m 20s) Loss: 0.0633(0.1889) EVAL: [2754/3883] Data 0.001 (0.001) Elapsed 8m 3s (remain 3m 17s) Loss: 0.1780(0.1893) EVAL: [2772/3883] Data 0.001 (0.001) Elapsed 8m 6s (remain 3m 14s) Loss: 0.2831(0.1895) EVAL: [2790/3883] Data 0.001 (0.001) Elapsed 8m 9s (remain 3m 11s) Loss: 0.0865(0.1894) EVAL: [2808/3883] Data 0.001 (0.001) Elapsed 8m 12s (remain 3m 8s) Loss: 0.3453(0.1894) EVAL: [2826/3883] Data 0.001 (0.001) Elapsed 8m 15s (remain 3m 5s) Loss: 0.2211(0.1892) EVAL: [2844/3883] Data 0.001 (0.001) Elapsed 8m 18s (remain 3m 2s) Loss: 0.2677(0.1892) EVAL: [2862/3883] Data 0.001 (0.001) Elapsed 8m 22s (remain 2m 58s) Loss: 0.3437(0.1892) EVAL: [2880/3883] Data 0.001 (0.001) Elapsed 8m 25s (remain 2m 55s) Loss: 0.0621(0.1890) EVAL: [2898/3883] Data 0.001 (0.001) Elapsed 8m 28s (remain 2m 52s) Loss: 0.3153(0.1890) EVAL: [2916/3883] Data 0.001 (0.001) Elapsed 8m 31s (remain 2m 49s) Loss: 0.2297(0.1890) EVAL: [2934/3883] Data 0.001 (0.001) Elapsed 8m 34s (remain 2m 46s) Loss: 0.3656(0.1888) EVAL: [2952/3883] Data 0.001 (0.001) Elapsed 8m 37s (remain 2m 43s) Loss: 0.0387(0.1889) EVAL: [2970/3883] Data 0.001 (0.001) Elapsed 8m 40s (remain 2m 39s) Loss: 0.1335(0.1889) EVAL: [2988/3883] Data 0.001 (0.001) Elapsed 8m 44s (remain 2m 36s) Loss: 0.1914(0.1890) EVAL: [3006/3883] Data 0.001 (0.001) Elapsed 8m 47s (remain 2m 33s) Loss: 0.1904(0.1888) EVAL: [3024/3883] Data 0.001 (0.001) Elapsed 8m 50s (remain 2m 30s) Loss: 0.3159(0.1888) EVAL: [3042/3883] Data 0.001 (0.001) Elapsed 8m 53s (remain 2m 27s) Loss: 0.1018(0.1886) EVAL: [3060/3883] Data 0.001 (0.001) Elapsed 8m 56s (remain 2m 24s) Loss: 0.2200(0.1884) EVAL: [3078/3883] Data 0.001 (0.001) Elapsed 8m 59s (remain 2m 20s) Loss: 0.3132(0.1884) EVAL: [3096/3883] Data 0.001 (0.001) Elapsed 9m 3s (remain 2m 17s) Loss: 0.5755(0.1884) EVAL: [3114/3883] Data 0.001 (0.001) Elapsed 9m 6s (remain 2m 14s) Loss: 0.2952(0.1885) EVAL: [3132/3883] Data 0.001 (0.001) Elapsed 9m 9s (remain 2m 11s) Loss: 0.1730(0.1884) EVAL: [3150/3883] Data 0.001 (0.001) Elapsed 9m 12s (remain 2m 8s) Loss: 0.1861(0.1883) EVAL: [3168/3883] Data 0.001 (0.001) Elapsed 9m 15s (remain 2m 5s) Loss: 0.4344(0.1883) EVAL: [3186/3883] Data 0.001 (0.001) Elapsed 9m 18s (remain 2m 2s) Loss: 0.0248(0.1880) EVAL: [3204/3883] Data 0.001 (0.001) Elapsed 9m 21s (remain 1m 58s) Loss: 0.2072(0.1881) EVAL: [3222/3883] Data 0.001 (0.001) Elapsed 9m 25s (remain 1m 55s) Loss: 0.1349(0.1880) EVAL: [3240/3883] Data 0.001 (0.001) Elapsed 9m 28s (remain 1m 52s) Loss: 0.5892(0.1881) EVAL: [3258/3883] Data 0.001 (0.001) Elapsed 9m 31s (remain 1m 49s) Loss: 0.0693(0.1880) EVAL: [3276/3883] Data 0.001 (0.001) Elapsed 9m 34s (remain 1m 46s) Loss: 0.2261(0.1879) EVAL: [3294/3883] Data 0.001 (0.001) Elapsed 9m 37s (remain 1m 43s) Loss: 0.2283(0.1880) EVAL: [3312/3883] Data 0.001 (0.001) Elapsed 9m 40s (remain 1m 39s) Loss: 0.0369(0.1882) EVAL: [3330/3883] Data 0.001 (0.001) Elapsed 9m 43s (remain 1m 36s) Loss: 0.2752(0.1880) EVAL: [3348/3883] Data 0.001 (0.001) Elapsed 9m 47s (remain 1m 33s) Loss: 0.0887(0.1881) EVAL: [3366/3883] Data 0.001 (0.001) Elapsed 9m 50s (remain 1m 30s) Loss: 0.0471(0.1879) EVAL: [3384/3883] Data 0.001 (0.001) Elapsed 9m 53s (remain 1m 27s) Loss: 0.3108(0.1881) EVAL: [3402/3883] Data 0.001 (0.001) Elapsed 9m 56s (remain 1m 24s) Loss: 0.2136(0.1882) EVAL: [3420/3883] Data 0.001 (0.001) Elapsed 9m 59s (remain 1m 20s) Loss: 0.2830(0.1879) EVAL: [3438/3883] Data 0.001 (0.001) Elapsed 10m 2s (remain 1m 17s) Loss: 0.2344(0.1882) EVAL: [3456/3883] Data 0.001 (0.001) Elapsed 10m 6s (remain 1m 14s) Loss: 0.3376(0.1882) EVAL: [3474/3883] Data 0.001 (0.001) Elapsed 10m 9s (remain 1m 11s) Loss: 0.0597(0.1882) EVAL: [3492/3883] Data 0.001 (0.001) Elapsed 10m 12s (remain 1m 8s) Loss: 0.2252(0.1883) EVAL: [3510/3883] Data 0.001 (0.001) Elapsed 10m 15s (remain 1m 5s) Loss: 0.0904(0.1882) EVAL: [3528/3883] Data 0.001 (0.001) Elapsed 10m 18s (remain 1m 2s) Loss: 0.6235(0.1884) EVAL: [3546/3883] Data 0.001 (0.001) Elapsed 10m 21s (remain 0m 58s) Loss: 0.3725(0.1884) EVAL: [3564/3883] Data 0.001 (0.001) Elapsed 10m 24s (remain 0m 55s) Loss: 0.2265(0.1882) EVAL: [3582/3883] Data 0.001 (0.001) Elapsed 10m 28s (remain 0m 52s) Loss: 0.1061(0.1882) EVAL: [3600/3883] Data 0.001 (0.001) Elapsed 10m 31s (remain 0m 49s) Loss: 0.1099(0.1883) EVAL: [3618/3883] Data 0.001 (0.001) Elapsed 10m 34s (remain 0m 46s) Loss: 0.2045(0.1884) EVAL: [3636/3883] Data 0.001 (0.001) Elapsed 10m 37s (remain 0m 43s) Loss: 0.0505(0.1885) EVAL: [3654/3883] Data 0.001 (0.001) Elapsed 10m 40s (remain 0m 39s) Loss: 0.1724(0.1884) EVAL: [3672/3883] Data 0.002 (0.001) Elapsed 10m 43s (remain 0m 36s) Loss: 0.6321(0.1886) EVAL: [3690/3883] Data 0.001 (0.001) Elapsed 10m 47s (remain 0m 33s) Loss: 0.0539(0.1886) EVAL: [3708/3883] Data 0.001 (0.001) Elapsed 10m 50s (remain 0m 30s) Loss: 0.1829(0.1887) EVAL: [3726/3883] Data 0.001 (0.001) Elapsed 10m 53s (remain 0m 27s) Loss: 0.2587(0.1886) EVAL: [3744/3883] Data 0.002 (0.001) Elapsed 10m 56s (remain 0m 24s) Loss: 0.2037(0.1887) EVAL: [3762/3883] Data 0.001 (0.001) Elapsed 10m 59s (remain 0m 21s) Loss: 0.0769(0.1886) EVAL: [3780/3883] Data 0.001 (0.001) Elapsed 11m 2s (remain 0m 17s) Loss: 0.0346(0.1885) EVAL: [3798/3883] Data 0.001 (0.001) Elapsed 11m 5s (remain 0m 14s) Loss: 0.0372(0.1885) EVAL: [3816/3883] Data 0.001 (0.001) Elapsed 11m 9s (remain 0m 11s) Loss: 0.0599(0.1886) EVAL: [3834/3883] Data 0.001 (0.001) Elapsed 11m 12s (remain 0m 8s) Loss: 0.0874(0.1886) EVAL: [3852/3883] Data 0.002 (0.001) Elapsed 11m 15s (remain 0m 5s) Loss: 0.2689(0.1887) EVAL: [3870/3883] Data 0.001 (0.001) Elapsed 11m 18s (remain 0m 2s) Loss: 0.0731(0.1884)
Epoch 3 - avg_train_loss: 0.4074 avg_val_loss: 0.1884 time: 1126s Epoch 3 - AUC: 0.9572403385534145 - pAUC: 0.1689444909883783 Epoch 3 - Save Best Score: 0.1689 Model
EVAL: [3882/3883] Data 0.001 (0.001) Elapsed 11m 20s (remain 0m 0s) Loss: 0.3771(0.1884) Epoch: [4][0/650] Data 0.686 (0.686) Elapsed 0m 1s (remain 12m 14s) Loss: 0.5161(0.5161) Grad: 14.3878 LR: 0.000080 Epoch: [4][18/650] Data 0.265 (0.285) Elapsed 0m 13s (remain 7m 26s) Loss: 0.2806(0.3602) Grad: 8.6952 LR: 0.000080 Epoch: [4][36/650] Data 0.262 (0.274) Elapsed 0m 25s (remain 7m 6s) Loss: 0.5517(0.3735) Grad: 16.1617 LR: 0.000080 Epoch: [4][54/650] Data 0.271 (0.271) Elapsed 0m 38s (remain 6m 51s) Loss: 0.2280(0.4022) Grad: 8.3559 LR: 0.000080 Epoch: [4][72/650] Data 0.271 (0.270) Elapsed 0m 50s (remain 6m 38s) Loss: 0.2354(0.3928) Grad: 8.8909 LR: 0.000080 Epoch: [4][90/650] Data 0.270 (0.270) Elapsed 1m 2s (remain 6m 24s) Loss: 0.4970(0.3810) Grad: 14.7302 LR: 0.000080 Epoch: [4][108/650] Data 0.272 (0.270) Elapsed 1m 14s (remain 6m 12s) Loss: 0.4437(0.3693) Grad: 11.2275 LR: 0.000080 Epoch: [4][126/650] Data 0.271 (0.269) Elapsed 1m 27s (remain 5m 59s) Loss: 0.5415(0.3690) Grad: 11.1365 LR: 0.000080 Epoch: [4][144/650] Data 0.270 (0.269) Elapsed 1m 39s (remain 5m 46s) Loss: 0.3211(0.3665) Grad: 8.2511 LR: 0.000080 Epoch: [4][162/650] Data 0.273 (0.267) Elapsed 1m 51s (remain 5m 34s) Loss: 0.5143(0.3617) Grad: 14.5998 LR: 0.000080 Epoch: [4][180/650] Data 0.272 (0.267) Elapsed 2m 4s (remain 5m 21s) Loss: 0.2955(0.3700) Grad: 8.8253 LR: 0.000080 Epoch: [4][198/650] Data 0.268 (0.267) Elapsed 2m 16s (remain 5m 9s) Loss: 0.5427(0.3687) Grad: 15.2589 LR: 0.000080 Epoch: [4][216/650] Data 0.272 (0.267) Elapsed 2m 28s (remain 4m 56s) Loss: 0.3833(0.3719) Grad: 10.8625 LR: 0.000080 Epoch: [4][234/650] Data 0.269 (0.267) Elapsed 2m 41s (remain 4m 44s) Loss: 0.1834(0.3736) Grad: 6.2694 LR: 0.000080 Epoch: [4][252/650] Data 0.264 (0.267) Elapsed 2m 53s (remain 4m 32s) Loss: 0.2234(0.3712) Grad: 7.4084 LR: 0.000080 Epoch: [4][270/650] Data 0.263 (0.267) Elapsed 3m 5s (remain 4m 19s) Loss: 0.3614(0.3713) Grad: 9.3921 LR: 0.000080 Epoch: [4][288/650] Data 0.266 (0.267) Elapsed 3m 18s (remain 4m 7s) Loss: 0.1921(0.3698) Grad: 5.9696 LR: 0.000080 Epoch: [4][306/650] Data 0.270 (0.267) Elapsed 3m 30s (remain 3m 55s) Loss: 0.3210(0.3723) Grad: 13.2074 LR: 0.000080 Epoch: [4][324/650] Data 0.261 (0.267) Elapsed 3m 42s (remain 3m 42s) Loss: 0.1754(0.3719) Grad: 7.1664 LR: 0.000080 Epoch: [4][342/650] Data 0.272 (0.267) Elapsed 3m 54s (remain 3m 30s) Loss: 0.5245(0.3736) Grad: 13.0820 LR: 0.000080 Epoch: [4][360/650] Data 0.271 (0.267) Elapsed 4m 7s (remain 3m 17s) Loss: 0.3468(0.3727) Grad: 10.1481 LR: 0.000080 Epoch: [4][378/650] Data 0.268 (0.267) Elapsed 4m 19s (remain 3m 5s) Loss: 0.4454(0.3717) Grad: 13.9034 LR: 0.000080 Epoch: [4][396/650] Data 0.259 (0.267) Elapsed 4m 31s (remain 2m 53s) Loss: 0.2754(0.3706) Grad: 6.4313 LR: 0.000080 Epoch: [4][414/650] Data 0.271 (0.267) Elapsed 4m 44s (remain 2m 40s) Loss: 0.6202(0.3708) Grad: 12.9967 LR: 0.000080 Epoch: [4][432/650] Data 0.259 (0.267) Elapsed 4m 56s (remain 2m 28s) Loss: 0.5804(0.3724) Grad: 12.1082 LR: 0.000080 Epoch: [4][450/650] Data 0.271 (0.267) Elapsed 5m 8s (remain 2m 16s) Loss: 0.3864(0.3757) Grad: 9.0371 LR: 0.000080 Epoch: [4][468/650] Data 0.271 (0.267) Elapsed 5m 21s (remain 2m 3s) Loss: 0.2503(0.3775) Grad: 7.8070 LR: 0.000080 Epoch: [4][486/650] Data 0.265 (0.267) Elapsed 5m 33s (remain 1m 51s) Loss: 0.3014(0.3765) Grad: 9.6994 LR: 0.000080 Epoch: [4][504/650] Data 0.269 (0.267) Elapsed 5m 45s (remain 1m 39s) Loss: 0.3121(0.3782) Grad: 11.0597 LR: 0.000080 Epoch: [4][522/650] Data 0.272 (0.267) Elapsed 5m 58s (remain 1m 26s) Loss: 0.1828(0.3775) Grad: 7.3395 LR: 0.000080 Epoch: [4][540/650] Data 0.266 (0.267) Elapsed 6m 10s (remain 1m 14s) Loss: 0.2960(0.3762) Grad: 9.0681 LR: 0.000080 Epoch: [4][558/650] Data 0.272 (0.267) Elapsed 6m 22s (remain 1m 2s) Loss: 0.0975(0.3755) Grad: 4.3532 LR: 0.000080 Epoch: [4][576/650] Data 0.272 (0.267) Elapsed 6m 34s (remain 0m 49s) Loss: 0.1466(0.3741) Grad: 5.3166 LR: 0.000080 Epoch: [4][594/650] Data 0.271 (0.267) Elapsed 6m 47s (remain 0m 37s) Loss: 0.3465(0.3749) Grad: 10.6198 LR: 0.000080 Epoch: [4][612/650] Data 0.268 (0.267) Elapsed 6m 59s (remain 0m 25s) Loss: 0.1756(0.3742) Grad: 5.4288 LR: 0.000080 Epoch: [4][630/650] Data 0.270 (0.267) Elapsed 7m 11s (remain 0m 13s) Loss: 0.3968(0.3753) Grad: 11.0711 LR: 0.000080 Epoch: [4][648/650] Data 0.272 (0.267) Elapsed 7m 24s (remain 0m 0s) Loss: 0.3950(0.3740) Grad: 9.7122 LR: 0.000080 Epoch: [4][649/650] Data 0.273 (0.267) Elapsed 7m 24s (remain 0m 0s) Loss: 0.2463(0.3738) Grad: 7.9042 LR: 0.000080 EVAL: [0/3883] Data 0.517 (0.517) Elapsed 0m 0s (remain 45m 2s) Loss: 0.1744(0.1744) EVAL: [18/3883] Data 0.001 (0.028) Elapsed 0m 3s (remain 13m 3s) Loss: 0.0904(0.0916) EVAL: [36/3883] Data 0.001 (0.015) Elapsed 0m 7s (remain 12m 7s) Loss: 0.0279(0.0933) EVAL: [54/3883] Data 0.001 (0.010) Elapsed 0m 10s (remain 11m 46s) Loss: 0.0095(0.0953) EVAL: [72/3883] Data 0.001 (0.008) Elapsed 0m 13s (remain 11m 34s) Loss: 0.2201(0.1056) EVAL: [90/3883] Data 0.001 (0.007) Elapsed 0m 16s (remain 11m 25s) Loss: 0.2000(0.1101) EVAL: [108/3883] Data 0.001 (0.006) Elapsed 0m 19s (remain 11m 19s) Loss: 0.2576(0.1140) EVAL: [126/3883] Data 0.001 (0.005) Elapsed 0m 22s (remain 11m 13s) Loss: 0.2517(0.1213) EVAL: [144/3883] Data 0.001 (0.005) Elapsed 0m 25s (remain 11m 7s) Loss: 0.2046(0.1214) EVAL: [162/3883] Data 0.001 (0.004) Elapsed 0m 29s (remain 11m 3s) Loss: 0.0318(0.1208) EVAL: [180/3883] Data 0.001 (0.004) Elapsed 0m 32s (remain 10m 58s) Loss: 0.0070(0.1170) EVAL: [198/3883] Data 0.001 (0.004) Elapsed 0m 35s (remain 10m 54s) Loss: 0.0571(0.1194) EVAL: [216/3883] Data 0.001 (0.003) Elapsed 0m 38s (remain 10m 50s) Loss: 0.0684(0.1189) EVAL: [234/3883] Data 0.001 (0.003) Elapsed 0m 41s (remain 10m 46s) Loss: 0.1119(0.1178) EVAL: [252/3883] Data 0.001 (0.003) Elapsed 0m 44s (remain 10m 42s) Loss: 0.1373(0.1154) EVAL: [270/3883] Data 0.001 (0.003) Elapsed 0m 47s (remain 10m 39s) Loss: 0.0585(0.1177) EVAL: [288/3883] Data 0.001 (0.003) Elapsed 0m 51s (remain 10m 35s) Loss: 0.2303(0.1211) EVAL: [306/3883] Data 0.001 (0.003) Elapsed 0m 54s (remain 10m 32s) Loss: 0.0783(0.1208) EVAL: [324/3883] Data 0.001 (0.003) Elapsed 0m 57s (remain 10m 28s) Loss: 0.0541(0.1200) EVAL: [342/3883] Data 0.001 (0.002) Elapsed 1m 0s (remain 10m 25s) Loss: 0.0056(0.1209) EVAL: [360/3883] Data 0.001 (0.002) Elapsed 1m 3s (remain 10m 21s) Loss: 0.0679(0.1211) EVAL: [378/3883] Data 0.001 (0.002) Elapsed 1m 6s (remain 10m 18s) Loss: 0.1329(0.1212) EVAL: [396/3883] Data 0.001 (0.002) Elapsed 1m 10s (remain 10m 14s) Loss: 0.2599(0.1212) EVAL: [414/3883] Data 0.001 (0.002) Elapsed 1m 13s (remain 10m 11s) Loss: 0.1759(0.1217) EVAL: [432/3883] Data 0.001 (0.002) Elapsed 1m 16s (remain 10m 8s) Loss: 0.0501(0.1221) EVAL: [450/3883] Data 0.001 (0.002) Elapsed 1m 19s (remain 10m 4s) Loss: 0.2530(0.1221) EVAL: [468/3883] Data 0.001 (0.002) Elapsed 1m 22s (remain 10m 1s) Loss: 0.0374(0.1230) EVAL: [486/3883] Data 0.001 (0.002) Elapsed 1m 25s (remain 9m 58s) Loss: 0.1687(0.1221) EVAL: [504/3883] Data 0.001 (0.002) Elapsed 1m 28s (remain 9m 54s) Loss: 0.0701(0.1222) EVAL: [522/3883] Data 0.001 (0.002) Elapsed 1m 32s (remain 9m 51s) Loss: 0.4162(0.1222) EVAL: [540/3883] Data 0.001 (0.002) Elapsed 1m 35s (remain 9m 48s) Loss: 0.0462(0.1210) EVAL: [558/3883] Data 0.001 (0.002) Elapsed 1m 38s (remain 9m 45s) Loss: 0.2565(0.1218) EVAL: [576/3883] Data 0.001 (0.002) Elapsed 1m 41s (remain 9m 41s) Loss: 0.0497(0.1209) EVAL: [594/3883] Data 0.001 (0.002) Elapsed 1m 44s (remain 9m 38s) Loss: 0.0988(0.1204) EVAL: [612/3883] Data 0.001 (0.002) Elapsed 1m 47s (remain 9m 35s) Loss: 0.1239(0.1201) EVAL: [630/3883] Data 0.001 (0.002) Elapsed 1m 51s (remain 9m 32s) Loss: 0.0357(0.1200) EVAL: [648/3883] Data 0.001 (0.002) Elapsed 1m 54s (remain 9m 28s) Loss: 0.0214(0.1204) EVAL: [666/3883] Data 0.001 (0.002) Elapsed 1m 57s (remain 9m 25s) Loss: 0.1063(0.1206) EVAL: [684/3883] Data 0.001 (0.002) Elapsed 2m 0s (remain 9m 22s) Loss: 0.1424(0.1210) EVAL: [702/3883] Data 0.001 (0.002) Elapsed 2m 3s (remain 9m 19s) Loss: 0.1193(0.1198) EVAL: [720/3883] Data 0.001 (0.002) Elapsed 2m 6s (remain 9m 15s) Loss: 0.1081(0.1192) EVAL: [738/3883] Data 0.001 (0.002) Elapsed 2m 9s (remain 9m 12s) Loss: 0.0191(0.1193) EVAL: [756/3883] Data 0.001 (0.002) Elapsed 2m 13s (remain 9m 9s) Loss: 0.1421(0.1194) EVAL: [774/3883] Data 0.001 (0.002) Elapsed 2m 16s (remain 9m 6s) Loss: 0.0749(0.1194) EVAL: [792/3883] Data 0.001 (0.002) Elapsed 2m 19s (remain 9m 2s) Loss: 0.1189(0.1199) EVAL: [810/3883] Data 0.001 (0.002) Elapsed 2m 22s (remain 8m 59s) Loss: 0.0176(0.1199) EVAL: [828/3883] Data 0.001 (0.002) Elapsed 2m 25s (remain 8m 56s) Loss: 0.0231(0.1201) EVAL: [846/3883] Data 0.001 (0.002) Elapsed 2m 28s (remain 8m 53s) Loss: 0.0293(0.1204) EVAL: [864/3883] Data 0.002 (0.002) Elapsed 2m 31s (remain 8m 50s) Loss: 0.2445(0.1211) EVAL: [882/3883] Data 0.001 (0.002) Elapsed 2m 35s (remain 8m 46s) Loss: 0.0061(0.1211) EVAL: [900/3883] Data 0.001 (0.002) Elapsed 2m 38s (remain 8m 43s) Loss: 0.2322(0.1210) EVAL: [918/3883] Data 0.001 (0.002) Elapsed 2m 41s (remain 8m 40s) Loss: 0.1425(0.1212) EVAL: [936/3883] Data 0.001 (0.002) Elapsed 2m 44s (remain 8m 37s) Loss: 0.1735(0.1212) EVAL: [954/3883] Data 0.001 (0.001) Elapsed 2m 47s (remain 8m 34s) Loss: 0.0570(0.1207) EVAL: [972/3883] Data 0.001 (0.001) Elapsed 2m 50s (remain 8m 31s) Loss: 0.0210(0.1210) EVAL: [990/3883] Data 0.001 (0.001) Elapsed 2m 54s (remain 8m 27s) Loss: 0.1130(0.1207) EVAL: [1008/3883] Data 0.001 (0.001) Elapsed 2m 57s (remain 8m 24s) Loss: 0.2202(0.1210) EVAL: [1026/3883] Data 0.001 (0.001) Elapsed 3m 0s (remain 8m 21s) Loss: 0.0312(0.1204) EVAL: [1044/3883] Data 0.001 (0.001) Elapsed 3m 3s (remain 8m 18s) Loss: 0.0473(0.1204) EVAL: [1062/3883] Data 0.001 (0.001) Elapsed 3m 6s (remain 8m 15s) Loss: 0.0818(0.1206) EVAL: [1080/3883] Data 0.001 (0.001) Elapsed 3m 9s (remain 8m 11s) Loss: 0.0937(0.1205) EVAL: [1098/3883] Data 0.001 (0.001) Elapsed 3m 12s (remain 8m 8s) Loss: 0.0801(0.1199) EVAL: [1116/3883] Data 0.001 (0.001) Elapsed 3m 16s (remain 8m 5s) Loss: 0.1384(0.1201) EVAL: [1134/3883] Data 0.001 (0.001) Elapsed 3m 19s (remain 8m 2s) Loss: 0.0367(0.1196) EVAL: [1152/3883] Data 0.001 (0.001) Elapsed 3m 22s (remain 7m 59s) Loss: 0.1314(0.1197) EVAL: [1170/3883] Data 0.001 (0.001) Elapsed 3m 25s (remain 7m 55s) Loss: 0.0794(0.1192) EVAL: [1188/3883] Data 0.001 (0.001) Elapsed 3m 28s (remain 7m 52s) Loss: 0.0985(0.1192) EVAL: [1206/3883] Data 0.001 (0.001) Elapsed 3m 31s (remain 7m 49s) Loss: 0.0231(0.1190) EVAL: [1224/3883] Data 0.001 (0.001) Elapsed 3m 34s (remain 7m 46s) Loss: 0.2084(0.1190) EVAL: [1242/3883] Data 0.001 (0.001) Elapsed 3m 38s (remain 7m 43s) Loss: 0.1423(0.1189) EVAL: [1260/3883] Data 0.001 (0.001) Elapsed 3m 41s (remain 7m 40s) Loss: 0.3262(0.1191) EVAL: [1278/3883] Data 0.001 (0.001) Elapsed 3m 44s (remain 7m 36s) Loss: 0.2441(0.1194) EVAL: [1296/3883] Data 0.001 (0.001) Elapsed 3m 47s (remain 7m 33s) Loss: 0.0267(0.1195) EVAL: [1314/3883] Data 0.001 (0.001) Elapsed 3m 50s (remain 7m 30s) Loss: 0.0851(0.1198) EVAL: [1332/3883] Data 0.002 (0.001) Elapsed 3m 53s (remain 7m 27s) Loss: 0.0855(0.1193) EVAL: [1350/3883] Data 0.001 (0.001) Elapsed 3m 57s (remain 7m 24s) Loss: 0.0145(0.1192) EVAL: [1368/3883] Data 0.001 (0.001) Elapsed 4m 0s (remain 7m 21s) Loss: 0.3208(0.1198) EVAL: [1386/3883] Data 0.001 (0.001) Elapsed 4m 3s (remain 7m 17s) Loss: 0.0439(0.1195) EVAL: [1404/3883] Data 0.001 (0.001) Elapsed 4m 6s (remain 7m 14s) Loss: 0.0580(0.1196) EVAL: [1422/3883] Data 0.001 (0.001) Elapsed 4m 9s (remain 7m 11s) Loss: 0.1370(0.1196) EVAL: [1440/3883] Data 0.001 (0.001) Elapsed 4m 12s (remain 7m 8s) Loss: 0.0706(0.1191) EVAL: [1458/3883] Data 0.001 (0.001) Elapsed 4m 15s (remain 7m 5s) Loss: 0.0640(0.1192) EVAL: [1476/3883] Data 0.001 (0.001) Elapsed 4m 19s (remain 7m 2s) Loss: 0.0973(0.1190) EVAL: [1494/3883] Data 0.001 (0.001) Elapsed 4m 22s (remain 6m 58s) Loss: 0.2943(0.1188) EVAL: [1512/3883] Data 0.001 (0.001) Elapsed 4m 25s (remain 6m 55s) Loss: 0.3109(0.1187) EVAL: [1530/3883] Data 0.001 (0.001) Elapsed 4m 28s (remain 6m 52s) Loss: 0.2503(0.1188) EVAL: [1548/3883] Data 0.001 (0.001) Elapsed 4m 31s (remain 6m 49s) Loss: 0.1211(0.1185) EVAL: [1566/3883] Data 0.001 (0.001) Elapsed 4m 34s (remain 6m 46s) Loss: 0.1949(0.1182) EVAL: [1584/3883] Data 0.001 (0.001) Elapsed 4m 37s (remain 6m 43s) Loss: 0.0171(0.1182) EVAL: [1602/3883] Data 0.001 (0.001) Elapsed 4m 41s (remain 6m 39s) Loss: 0.2507(0.1186) EVAL: [1620/3883] Data 0.001 (0.001) Elapsed 4m 44s (remain 6m 36s) Loss: 0.2390(0.1192) EVAL: [1638/3883] Data 0.001 (0.001) Elapsed 4m 47s (remain 6m 33s) Loss: 0.1125(0.1194) EVAL: [1656/3883] Data 0.001 (0.001) Elapsed 4m 50s (remain 6m 30s) Loss: 0.1062(0.1194) EVAL: [1674/3883] Data 0.001 (0.001) Elapsed 4m 53s (remain 6m 27s) Loss: 0.1613(0.1197) EVAL: [1692/3883] Data 0.001 (0.001) Elapsed 4m 56s (remain 6m 24s) Loss: 0.0368(0.1196) EVAL: [1710/3883] Data 0.001 (0.001) Elapsed 5m 0s (remain 6m 20s) Loss: 0.0915(0.1200) EVAL: [1728/3883] Data 0.001 (0.001) Elapsed 5m 3s (remain 6m 17s) Loss: 0.0432(0.1201) EVAL: [1746/3883] Data 0.001 (0.001) Elapsed 5m 6s (remain 6m 14s) Loss: 0.0965(0.1201) EVAL: [1764/3883] Data 0.001 (0.001) Elapsed 5m 9s (remain 6m 11s) Loss: 0.0704(0.1204) EVAL: [1782/3883] Data 0.001 (0.001) Elapsed 5m 12s (remain 6m 8s) Loss: 0.3704(0.1205) EVAL: [1800/3883] Data 0.001 (0.001) Elapsed 5m 15s (remain 6m 5s) Loss: 0.2139(0.1205) EVAL: [1818/3883] Data 0.001 (0.001) Elapsed 5m 18s (remain 6m 1s) Loss: 0.0328(0.1205) EVAL: [1836/3883] Data 0.001 (0.001) Elapsed 5m 22s (remain 5m 58s) Loss: 0.0566(0.1207) EVAL: [1854/3883] Data 0.001 (0.001) Elapsed 5m 25s (remain 5m 55s) Loss: 0.0041(0.1206) EVAL: [1872/3883] Data 0.001 (0.001) Elapsed 5m 28s (remain 5m 52s) Loss: 0.2407(0.1207) EVAL: [1890/3883] Data 0.001 (0.001) Elapsed 5m 31s (remain 5m 49s) Loss: 0.0307(0.1208) EVAL: [1908/3883] Data 0.001 (0.001) Elapsed 5m 34s (remain 5m 46s) Loss: 0.1211(0.1208) EVAL: [1926/3883] Data 0.001 (0.001) Elapsed 5m 37s (remain 5m 42s) Loss: 0.0077(0.1206) EVAL: [1944/3883] Data 0.001 (0.001) Elapsed 5m 40s (remain 5m 39s) Loss: 0.0348(0.1207) EVAL: [1962/3883] Data 0.001 (0.001) Elapsed 5m 44s (remain 5m 36s) Loss: 0.0679(0.1210) EVAL: [1980/3883] Data 0.001 (0.001) Elapsed 5m 47s (remain 5m 33s) Loss: 0.1704(0.1210) EVAL: [1998/3883] Data 0.001 (0.001) Elapsed 5m 50s (remain 5m 30s) Loss: 0.1392(0.1211) EVAL: [2016/3883] Data 0.001 (0.001) Elapsed 5m 53s (remain 5m 27s) Loss: 0.0269(0.1213) EVAL: [2034/3883] Data 0.001 (0.001) Elapsed 5m 56s (remain 5m 23s) Loss: 0.0545(0.1212) EVAL: [2052/3883] Data 0.001 (0.001) Elapsed 5m 59s (remain 5m 20s) Loss: 0.3110(0.1213) EVAL: [2070/3883] Data 0.001 (0.001) Elapsed 6m 3s (remain 5m 17s) Loss: 0.1791(0.1215) EVAL: [2088/3883] Data 0.001 (0.001) Elapsed 6m 6s (remain 5m 14s) Loss: 0.0198(0.1214) EVAL: [2106/3883] Data 0.001 (0.001) Elapsed 6m 9s (remain 5m 11s) Loss: 0.1074(0.1214) EVAL: [2124/3883] Data 0.001 (0.001) Elapsed 6m 12s (remain 5m 8s) Loss: 0.0632(0.1210) EVAL: [2142/3883] Data 0.001 (0.001) Elapsed 6m 15s (remain 5m 4s) Loss: 0.0730(0.1210) EVAL: [2160/3883] Data 0.001 (0.001) Elapsed 6m 18s (remain 5m 1s) Loss: 0.1044(0.1207) EVAL: [2178/3883] Data 0.001 (0.001) Elapsed 6m 21s (remain 4m 58s) Loss: 0.0148(0.1204) EVAL: [2196/3883] Data 0.002 (0.001) Elapsed 6m 25s (remain 4m 55s) Loss: 0.0376(0.1204) EVAL: [2214/3883] Data 0.001 (0.001) Elapsed 6m 28s (remain 4m 52s) Loss: 0.1374(0.1204) EVAL: [2232/3883] Data 0.001 (0.001) Elapsed 6m 31s (remain 4m 49s) Loss: 0.2171(0.1205) EVAL: [2250/3883] Data 0.001 (0.001) Elapsed 6m 34s (remain 4m 46s) Loss: 0.4115(0.1207) EVAL: [2268/3883] Data 0.001 (0.001) Elapsed 6m 37s (remain 4m 42s) Loss: 0.0834(0.1208) EVAL: [2286/3883] Data 0.001 (0.001) Elapsed 6m 40s (remain 4m 39s) Loss: 0.1775(0.1207) EVAL: [2304/3883] Data 0.001 (0.001) Elapsed 6m 44s (remain 4m 36s) Loss: 0.1654(0.1205) EVAL: [2322/3883] Data 0.001 (0.001) Elapsed 6m 47s (remain 4m 33s) Loss: 0.0457(0.1205) EVAL: [2340/3883] Data 0.001 (0.001) Elapsed 6m 50s (remain 4m 30s) Loss: 0.1289(0.1205) EVAL: [2358/3883] Data 0.001 (0.001) Elapsed 6m 53s (remain 4m 27s) Loss: 0.1745(0.1203) EVAL: [2376/3883] Data 0.001 (0.001) Elapsed 6m 56s (remain 4m 23s) Loss: 0.2559(0.1205) EVAL: [2394/3883] Data 0.001 (0.001) Elapsed 6m 59s (remain 4m 20s) Loss: 0.3498(0.1203) EVAL: [2412/3883] Data 0.001 (0.001) Elapsed 7m 2s (remain 4m 17s) Loss: 0.1721(0.1203) EVAL: [2430/3883] Data 0.002 (0.001) Elapsed 7m 6s (remain 4m 14s) Loss: 0.0130(0.1201) EVAL: [2448/3883] Data 0.001 (0.001) Elapsed 7m 9s (remain 4m 11s) Loss: 0.0988(0.1198) EVAL: [2466/3883] Data 0.001 (0.001) Elapsed 7m 12s (remain 4m 8s) Loss: 0.0732(0.1197) EVAL: [2484/3883] Data 0.001 (0.001) Elapsed 7m 15s (remain 4m 5s) Loss: 0.2352(0.1200) EVAL: [2502/3883] Data 0.001 (0.001) Elapsed 7m 18s (remain 4m 1s) Loss: 0.0292(0.1201) EVAL: [2520/3883] Data 0.001 (0.001) Elapsed 7m 21s (remain 3m 58s) Loss: 0.0827(0.1201) EVAL: [2538/3883] Data 0.001 (0.001) Elapsed 7m 24s (remain 3m 55s) Loss: 0.0991(0.1204) EVAL: [2556/3883] Data 0.001 (0.001) Elapsed 7m 28s (remain 3m 52s) Loss: 0.4510(0.1204) EVAL: [2574/3883] Data 0.001 (0.001) Elapsed 7m 31s (remain 3m 49s) Loss: 0.4533(0.1204) EVAL: [2592/3883] Data 0.001 (0.001) Elapsed 7m 34s (remain 3m 46s) Loss: 0.0364(0.1205) EVAL: [2610/3883] Data 0.001 (0.001) Elapsed 7m 37s (remain 3m 42s) Loss: 0.1393(0.1207) EVAL: [2628/3883] Data 0.001 (0.001) Elapsed 7m 40s (remain 3m 39s) Loss: 0.0252(0.1208) EVAL: [2646/3883] Data 0.001 (0.001) Elapsed 7m 43s (remain 3m 36s) Loss: 0.1561(0.1206) EVAL: [2664/3883] Data 0.001 (0.001) Elapsed 7m 47s (remain 3m 33s) Loss: 0.0534(0.1206) EVAL: [2682/3883] Data 0.001 (0.001) Elapsed 7m 50s (remain 3m 30s) Loss: 0.1192(0.1205) EVAL: [2700/3883] Data 0.001 (0.001) Elapsed 7m 53s (remain 3m 27s) Loss: 0.0751(0.1204) EVAL: [2718/3883] Data 0.001 (0.001) Elapsed 7m 56s (remain 3m 23s) Loss: 0.1639(0.1204) EVAL: [2736/3883] Data 0.001 (0.001) Elapsed 7m 59s (remain 3m 20s) Loss: 0.0222(0.1205) EVAL: [2754/3883] Data 0.001 (0.001) Elapsed 8m 2s (remain 3m 17s) Loss: 0.0977(0.1209) EVAL: [2772/3883] Data 0.001 (0.001) Elapsed 8m 5s (remain 3m 14s) Loss: 0.1908(0.1209) EVAL: [2790/3883] Data 0.002 (0.001) Elapsed 8m 9s (remain 3m 11s) Loss: 0.0522(0.1208) EVAL: [2808/3883] Data 0.001 (0.001) Elapsed 8m 12s (remain 3m 8s) Loss: 0.1483(0.1209) EVAL: [2826/3883] Data 0.001 (0.001) Elapsed 8m 15s (remain 3m 5s) Loss: 0.0817(0.1207) EVAL: [2844/3883] Data 0.001 (0.001) Elapsed 8m 18s (remain 3m 1s) Loss: 0.1356(0.1207) EVAL: [2862/3883] Data 0.001 (0.001) Elapsed 8m 21s (remain 2m 58s) Loss: 0.3131(0.1208) EVAL: [2880/3883] Data 0.001 (0.001) Elapsed 8m 24s (remain 2m 55s) Loss: 0.0131(0.1205) EVAL: [2898/3883] Data 0.001 (0.001) Elapsed 8m 27s (remain 2m 52s) Loss: 0.2349(0.1205) EVAL: [2916/3883] Data 0.001 (0.001) Elapsed 8m 31s (remain 2m 49s) Loss: 0.1541(0.1205) EVAL: [2934/3883] Data 0.001 (0.001) Elapsed 8m 34s (remain 2m 46s) Loss: 0.2375(0.1204) EVAL: [2952/3883] Data 0.001 (0.001) Elapsed 8m 37s (remain 2m 42s) Loss: 0.0162(0.1205) EVAL: [2970/3883] Data 0.001 (0.001) Elapsed 8m 40s (remain 2m 39s) Loss: 0.0534(0.1204) EVAL: [2988/3883] Data 0.001 (0.001) Elapsed 8m 43s (remain 2m 36s) Loss: 0.0924(0.1205) EVAL: [3006/3883] Data 0.001 (0.001) Elapsed 8m 46s (remain 2m 33s) Loss: 0.1318(0.1204) EVAL: [3024/3883] Data 0.001 (0.001) Elapsed 8m 50s (remain 2m 30s) Loss: 0.2042(0.1204) EVAL: [3042/3883] Data 0.001 (0.001) Elapsed 8m 53s (remain 2m 27s) Loss: 0.0462(0.1202) EVAL: [3060/3883] Data 0.001 (0.001) Elapsed 8m 56s (remain 2m 24s) Loss: 0.1251(0.1202) EVAL: [3078/3883] Data 0.001 (0.001) Elapsed 8m 59s (remain 2m 20s) Loss: 0.1728(0.1202) EVAL: [3096/3883] Data 0.001 (0.001) Elapsed 9m 2s (remain 2m 17s) Loss: 0.5085(0.1202) EVAL: [3114/3883] Data 0.001 (0.001) Elapsed 9m 5s (remain 2m 14s) Loss: 0.1523(0.1203) EVAL: [3132/3883] Data 0.001 (0.001) Elapsed 9m 8s (remain 2m 11s) Loss: 0.1005(0.1202) EVAL: [3150/3883] Data 0.001 (0.001) Elapsed 9m 12s (remain 2m 8s) Loss: 0.0726(0.1200) EVAL: [3168/3883] Data 0.001 (0.001) Elapsed 9m 15s (remain 2m 5s) Loss: 0.3385(0.1200) EVAL: [3186/3883] Data 0.001 (0.001) Elapsed 9m 18s (remain 2m 1s) Loss: 0.0068(0.1198) EVAL: [3204/3883] Data 0.001 (0.001) Elapsed 9m 21s (remain 1m 58s) Loss: 0.0890(0.1198) EVAL: [3222/3883] Data 0.002 (0.001) Elapsed 9m 24s (remain 1m 55s) Loss: 0.1349(0.1198) EVAL: [3240/3883] Data 0.001 (0.001) Elapsed 9m 27s (remain 1m 52s) Loss: 0.4512(0.1199) EVAL: [3258/3883] Data 0.001 (0.001) Elapsed 9m 30s (remain 1m 49s) Loss: 0.0236(0.1199) EVAL: [3276/3883] Data 0.001 (0.001) Elapsed 9m 34s (remain 1m 46s) Loss: 0.1464(0.1198) EVAL: [3294/3883] Data 0.001 (0.001) Elapsed 9m 37s (remain 1m 43s) Loss: 0.1414(0.1199) EVAL: [3312/3883] Data 0.001 (0.001) Elapsed 9m 40s (remain 1m 39s) Loss: 0.0097(0.1200) EVAL: [3330/3883] Data 0.001 (0.001) Elapsed 9m 43s (remain 1m 36s) Loss: 0.1940(0.1198) EVAL: [3348/3883] Data 0.001 (0.001) Elapsed 9m 46s (remain 1m 33s) Loss: 0.0519(0.1198) EVAL: [3366/3883] Data 0.001 (0.001) Elapsed 9m 49s (remain 1m 30s) Loss: 0.0451(0.1198) EVAL: [3384/3883] Data 0.001 (0.001) Elapsed 9m 53s (remain 1m 27s) Loss: 0.2096(0.1199) EVAL: [3402/3883] Data 0.001 (0.001) Elapsed 9m 56s (remain 1m 24s) Loss: 0.1446(0.1200) EVAL: [3420/3883] Data 0.001 (0.001) Elapsed 9m 59s (remain 1m 20s) Loss: 0.2546(0.1199) EVAL: [3438/3883] Data 0.001 (0.001) Elapsed 10m 2s (remain 1m 17s) Loss: 0.1266(0.1201) EVAL: [3456/3883] Data 0.001 (0.001) Elapsed 10m 5s (remain 1m 14s) Loss: 0.2522(0.1201) EVAL: [3474/3883] Data 0.001 (0.001) Elapsed 10m 8s (remain 1m 11s) Loss: 0.0237(0.1202) EVAL: [3492/3883] Data 0.001 (0.001) Elapsed 10m 11s (remain 1m 8s) Loss: 0.1047(0.1202) EVAL: [3510/3883] Data 0.001 (0.001) Elapsed 10m 15s (remain 1m 5s) Loss: 0.0759(0.1201) EVAL: [3528/3883] Data 0.001 (0.001) Elapsed 10m 18s (remain 1m 2s) Loss: 0.5256(0.1203) EVAL: [3546/3883] Data 0.001 (0.001) Elapsed 10m 21s (remain 0m 58s) Loss: 0.2270(0.1204) EVAL: [3564/3883] Data 0.001 (0.001) Elapsed 10m 24s (remain 0m 55s) Loss: 0.2062(0.1201) EVAL: [3582/3883] Data 0.001 (0.001) Elapsed 10m 27s (remain 0m 52s) Loss: 0.0461(0.1201) EVAL: [3600/3883] Data 0.001 (0.001) Elapsed 10m 30s (remain 0m 49s) Loss: 0.0664(0.1202) EVAL: [3618/3883] Data 0.001 (0.001) Elapsed 10m 33s (remain 0m 46s) Loss: 0.1970(0.1202) EVAL: [3636/3883] Data 0.001 (0.001) Elapsed 10m 37s (remain 0m 43s) Loss: 0.0520(0.1203) EVAL: [3654/3883] Data 0.001 (0.001) Elapsed 10m 40s (remain 0m 39s) Loss: 0.0692(0.1202) EVAL: [3672/3883] Data 0.001 (0.001) Elapsed 10m 43s (remain 0m 36s) Loss: 0.4023(0.1203) EVAL: [3690/3883] Data 0.001 (0.001) Elapsed 10m 46s (remain 0m 33s) Loss: 0.0137(0.1203) EVAL: [3708/3883] Data 0.001 (0.001) Elapsed 10m 49s (remain 0m 30s) Loss: 0.0711(0.1203) EVAL: [3726/3883] Data 0.001 (0.001) Elapsed 10m 52s (remain 0m 27s) Loss: 0.1352(0.1202) EVAL: [3744/3883] Data 0.001 (0.001) Elapsed 10m 56s (remain 0m 24s) Loss: 0.1439(0.1203) EVAL: [3762/3883] Data 0.001 (0.001) Elapsed 10m 59s (remain 0m 21s) Loss: 0.0819(0.1202) EVAL: [3780/3883] Data 0.001 (0.001) Elapsed 11m 2s (remain 0m 17s) Loss: 0.0124(0.1202) EVAL: [3798/3883] Data 0.001 (0.001) Elapsed 11m 5s (remain 0m 14s) Loss: 0.0090(0.1202) EVAL: [3816/3883] Data 0.001 (0.001) Elapsed 11m 8s (remain 0m 11s) Loss: 0.0227(0.1202) EVAL: [3834/3883] Data 0.001 (0.001) Elapsed 11m 11s (remain 0m 8s) Loss: 0.0326(0.1202) EVAL: [3852/3883] Data 0.001 (0.001) Elapsed 11m 14s (remain 0m 5s) Loss: 0.2164(0.1203) EVAL: [3870/3883] Data 0.001 (0.001) Elapsed 11m 18s (remain 0m 2s) Loss: 0.0509(0.1201)
Epoch 4 - avg_train_loss: 0.3738 avg_val_loss: 0.1201 time: 1125s Epoch 4 - AUC: 0.9644866920152091 - pAUC: 0.17463384245236402 Epoch 4 - Save Best Score: 0.1746 Model
EVAL: [3882/3883] Data 0.001 (0.001) Elapsed 11m 20s (remain 0m 0s) Loss: 0.2134(0.1201) Epoch: [5][0/650] Data 0.717 (0.717) Elapsed 0m 1s (remain 12m 37s) Loss: 0.1759(0.1759) Grad: 5.9223 LR: 0.000066 Epoch: [5][18/650] Data 0.258 (0.289) Elapsed 0m 13s (remain 7m 27s) Loss: 0.1697(0.2975) Grad: 5.3467 LR: 0.000066 Epoch: [5][36/650] Data 0.272 (0.280) Elapsed 0m 25s (remain 7m 7s) Loss: 0.4989(0.3398) Grad: 17.8310 LR: 0.000066 Epoch: [5][54/650] Data 0.267 (0.274) Elapsed 0m 38s (remain 6m 51s) Loss: 0.4198(0.3398) Grad: 9.3448 LR: 0.000066 Epoch: [5][72/650] Data 0.268 (0.272) Elapsed 0m 50s (remain 6m 38s) Loss: 0.1747(0.3416) Grad: 5.6316 LR: 0.000066 Epoch: [5][90/650] Data 0.258 (0.271) Elapsed 1m 2s (remain 6m 25s) Loss: 0.7529(0.3419) Grad: 13.8368 LR: 0.000066 Epoch: [5][108/650] Data 0.270 (0.270) Elapsed 1m 14s (remain 6m 12s) Loss: 0.3285(0.3356) Grad: 8.2665 LR: 0.000066 Epoch: [5][126/650] Data 0.268 (0.270) Elapsed 1m 27s (remain 5m 59s) Loss: 0.4040(0.3349) Grad: 7.7235 LR: 0.000066 Epoch: [5][144/650] Data 0.267 (0.270) Elapsed 1m 39s (remain 5m 46s) Loss: 0.4024(0.3342) Grad: 9.2124 LR: 0.000066 Epoch: [5][162/650] Data 0.271 (0.270) Elapsed 1m 51s (remain 5m 34s) Loss: 0.2935(0.3355) Grad: 9.3955 LR: 0.000066 Epoch: [5][180/650] Data 0.257 (0.269) Elapsed 2m 4s (remain 5m 21s) Loss: 0.3530(0.3429) Grad: 10.1772 LR: 0.000066 Epoch: [5][198/650] Data 0.272 (0.269) Elapsed 2m 16s (remain 5m 9s) Loss: 0.4249(0.3399) Grad: 13.0205 LR: 0.000066 Epoch: [5][216/650] Data 0.270 (0.269) Elapsed 2m 28s (remain 4m 56s) Loss: 0.3282(0.3373) Grad: 8.8688 LR: 0.000066 Epoch: [5][234/650] Data 0.270 (0.268) Elapsed 2m 41s (remain 4m 44s) Loss: 0.5423(0.3415) Grad: 10.8492 LR: 0.000066 Epoch: [5][252/650] Data 0.271 (0.268) Elapsed 2m 53s (remain 4m 32s) Loss: 0.2552(0.3411) Grad: 7.4406 LR: 0.000066 Epoch: [5][270/650] Data 0.270 (0.268) Elapsed 3m 5s (remain 4m 19s) Loss: 0.2963(0.3379) Grad: 10.1796 LR: 0.000066 Epoch: [5][288/650] Data 0.271 (0.268) Elapsed 3m 18s (remain 4m 7s) Loss: 0.2279(0.3368) Grad: 7.5042 LR: 0.000066 Epoch: [5][306/650] Data 0.273 (0.268) Elapsed 3m 30s (remain 3m 55s) Loss: 0.4556(0.3375) Grad: 10.9485 LR: 0.000066 Epoch: [5][324/650] Data 0.267 (0.268) Elapsed 3m 42s (remain 3m 42s) Loss: 0.1420(0.3360) Grad: 8.8681 LR: 0.000066 Epoch: [5][342/650] Data 0.270 (0.268) Elapsed 3m 55s (remain 3m 30s) Loss: 0.1718(0.3342) Grad: 5.5009 LR: 0.000066 Epoch: [5][360/650] Data 0.262 (0.268) Elapsed 4m 7s (remain 3m 18s) Loss: 0.3163(0.3360) Grad: 12.0073 LR: 0.000066 Epoch: [5][378/650] Data 0.272 (0.268) Elapsed 4m 19s (remain 3m 5s) Loss: 0.5190(0.3367) Grad: 15.7462 LR: 0.000066 Epoch: [5][396/650] Data 0.260 (0.268) Elapsed 4m 31s (remain 2m 53s) Loss: 0.2492(0.3349) Grad: 8.5157 LR: 0.000066 Epoch: [5][414/650] Data 0.272 (0.268) Elapsed 4m 44s (remain 2m 40s) Loss: 0.2921(0.3327) Grad: 7.1778 LR: 0.000066 Epoch: [5][432/650] Data 0.272 (0.268) Elapsed 4m 56s (remain 2m 28s) Loss: 0.6185(0.3317) Grad: 19.2257 LR: 0.000066 Epoch: [5][450/650] Data 0.272 (0.267) Elapsed 5m 8s (remain 2m 16s) Loss: 0.2464(0.3328) Grad: 7.0113 LR: 0.000066 Epoch: [5][468/650] Data 0.262 (0.267) Elapsed 5m 21s (remain 2m 3s) Loss: 0.1170(0.3356) Grad: 5.1384 LR: 0.000066 Epoch: [5][486/650] Data 0.271 (0.267) Elapsed 5m 33s (remain 1m 51s) Loss: 0.2204(0.3359) Grad: 6.6311 LR: 0.000066 Epoch: [5][504/650] Data 0.261 (0.267) Elapsed 5m 45s (remain 1m 39s) Loss: 0.1590(0.3356) Grad: 4.3151 LR: 0.000066 Epoch: [5][522/650] Data 0.272 (0.267) Elapsed 5m 58s (remain 1m 26s) Loss: 0.0500(0.3347) Grad: 2.0946 LR: 0.000066 Epoch: [5][540/650] Data 0.271 (0.267) Elapsed 6m 10s (remain 1m 14s) Loss: 0.6337(0.3337) Grad: 16.5904 LR: 0.000066 Epoch: [5][558/650] Data 0.264 (0.267) Elapsed 6m 22s (remain 1m 2s) Loss: 0.1482(0.3346) Grad: 4.7727 LR: 0.000066 Epoch: [5][576/650] Data 0.265 (0.267) Elapsed 6m 35s (remain 0m 49s) Loss: 0.2739(0.3331) Grad: 10.2463 LR: 0.000066 Epoch: [5][594/650] Data 0.240 (0.267) Elapsed 6m 47s (remain 0m 37s) Loss: 0.3891(0.3348) Grad: 9.1534 LR: 0.000066 Epoch: [5][612/650] Data 0.262 (0.267) Elapsed 6m 59s (remain 0m 25s) Loss: 0.5128(0.3350) Grad: 14.7601 LR: 0.000066 Epoch: [5][630/650] Data 0.272 (0.267) Elapsed 7m 11s (remain 0m 13s) Loss: 1.2112(0.3351) Grad: 18.7087 LR: 0.000066 Epoch: [5][648/650] Data 0.273 (0.267) Elapsed 7m 24s (remain 0m 0s) Loss: 0.4695(0.3367) Grad: 12.8143 LR: 0.000066 Epoch: [5][649/650] Data 0.274 (0.267) Elapsed 7m 24s (remain 0m 0s) Loss: 0.4268(0.3368) Grad: 11.7171 LR: 0.000066 EVAL: [0/3883] Data 0.602 (0.602) Elapsed 0m 0s (remain 50m 18s) Loss: 0.0588(0.0588) EVAL: [18/3883] Data 0.001 (0.033) Elapsed 0m 3s (remain 13m 19s) Loss: 0.0497(0.0475) EVAL: [36/3883] Data 0.001 (0.017) Elapsed 0m 7s (remain 12m 16s) Loss: 0.0143(0.0542) EVAL: [54/3883] Data 0.001 (0.012) Elapsed 0m 10s (remain 11m 52s) Loss: 0.0033(0.0541) EVAL: [72/3883] Data 0.001 (0.009) Elapsed 0m 13s (remain 11m 38s) Loss: 0.1774(0.0628) EVAL: [90/3883] Data 0.001 (0.008) Elapsed 0m 16s (remain 11m 29s) Loss: 0.1237(0.0705) EVAL: [108/3883] Data 0.001 (0.006) Elapsed 0m 19s (remain 11m 21s) Loss: 0.1626(0.0725) EVAL: [126/3883] Data 0.001 (0.006) Elapsed 0m 22s (remain 11m 15s) Loss: 0.2051(0.0760) EVAL: [144/3883] Data 0.001 (0.005) Elapsed 0m 25s (remain 11m 10s) Loss: 0.0994(0.0737) EVAL: [162/3883] Data 0.001 (0.005) Elapsed 0m 29s (remain 11m 5s) Loss: 0.0122(0.0736) EVAL: [180/3883] Data 0.001 (0.004) Elapsed 0m 32s (remain 11m 0s) Loss: 0.0023(0.0717) EVAL: [198/3883] Data 0.001 (0.004) Elapsed 0m 35s (remain 10m 56s) Loss: 0.0158(0.0752) EVAL: [216/3883] Data 0.001 (0.004) Elapsed 0m 38s (remain 10m 51s) Loss: 0.0315(0.0756) EVAL: [234/3883] Data 0.001 (0.003) Elapsed 0m 41s (remain 10m 48s) Loss: 0.0976(0.0745) EVAL: [252/3883] Data 0.001 (0.003) Elapsed 0m 44s (remain 10m 44s) Loss: 0.0433(0.0721) EVAL: [270/3883] Data 0.001 (0.003) Elapsed 0m 48s (remain 10m 40s) Loss: 0.0192(0.0744) EVAL: [288/3883] Data 0.001 (0.003) Elapsed 0m 51s (remain 10m 36s) Loss: 0.1576(0.0765) EVAL: [306/3883] Data 0.001 (0.003) Elapsed 0m 54s (remain 10m 33s) Loss: 0.0380(0.0764) EVAL: [324/3883] Data 0.001 (0.003) Elapsed 0m 57s (remain 10m 29s) Loss: 0.0263(0.0758) EVAL: [342/3883] Data 0.001 (0.003) Elapsed 1m 0s (remain 10m 25s) Loss: 0.0043(0.0772) EVAL: [360/3883] Data 0.001 (0.003) Elapsed 1m 3s (remain 10m 22s) Loss: 0.0253(0.0770) EVAL: [378/3883] Data 0.001 (0.003) Elapsed 1m 6s (remain 10m 18s) Loss: 0.0295(0.0768) EVAL: [396/3883] Data 0.001 (0.002) Elapsed 1m 10s (remain 10m 15s) Loss: 0.1876(0.0765) EVAL: [414/3883] Data 0.001 (0.002) Elapsed 1m 13s (remain 10m 12s) Loss: 0.1282(0.0775) EVAL: [432/3883] Data 0.001 (0.002) Elapsed 1m 16s (remain 10m 8s) Loss: 0.0238(0.0775) EVAL: [450/3883] Data 0.001 (0.002) Elapsed 1m 19s (remain 10m 5s) Loss: 0.1296(0.0780) EVAL: [468/3883] Data 0.001 (0.002) Elapsed 1m 22s (remain 10m 1s) Loss: 0.0123(0.0785) EVAL: [486/3883] Data 0.001 (0.002) Elapsed 1m 25s (remain 9m 58s) Loss: 0.1640(0.0779) EVAL: [504/3883] Data 0.001 (0.002) Elapsed 1m 28s (remain 9m 55s) Loss: 0.0436(0.0780) EVAL: [522/3883] Data 0.001 (0.002) Elapsed 1m 32s (remain 9m 51s) Loss: 0.1649(0.0777) EVAL: [540/3883] Data 0.001 (0.002) Elapsed 1m 35s (remain 9m 48s) Loss: 0.0437(0.0768) EVAL: [558/3883] Data 0.001 (0.002) Elapsed 1m 38s (remain 9m 45s) Loss: 0.2094(0.0774) EVAL: [576/3883] Data 0.001 (0.002) Elapsed 1m 41s (remain 9m 42s) Loss: 0.0059(0.0768) EVAL: [594/3883] Data 0.001 (0.002) Elapsed 1m 44s (remain 9m 38s) Loss: 0.0822(0.0762) EVAL: [612/3883] Data 0.001 (0.002) Elapsed 1m 47s (remain 9m 35s) Loss: 0.1109(0.0758) EVAL: [630/3883] Data 0.001 (0.002) Elapsed 1m 51s (remain 9m 32s) Loss: 0.0252(0.0758) EVAL: [648/3883] Data 0.001 (0.002) Elapsed 1m 54s (remain 9m 29s) Loss: 0.0087(0.0759) EVAL: [666/3883] Data 0.001 (0.002) Elapsed 1m 57s (remain 9m 25s) Loss: 0.1095(0.0758) EVAL: [684/3883] Data 0.001 (0.002) Elapsed 2m 0s (remain 9m 22s) Loss: 0.0964(0.0758) EVAL: [702/3883] Data 0.001 (0.002) Elapsed 2m 3s (remain 9m 19s) Loss: 0.0443(0.0749) EVAL: [720/3883] Data 0.001 (0.002) Elapsed 2m 6s (remain 9m 16s) Loss: 0.0774(0.0745) EVAL: [738/3883] Data 0.001 (0.002) Elapsed 2m 9s (remain 9m 12s) Loss: 0.0098(0.0746) EVAL: [756/3883] Data 0.001 (0.002) Elapsed 2m 13s (remain 9m 9s) Loss: 0.0656(0.0746) EVAL: [774/3883] Data 0.001 (0.002) Elapsed 2m 16s (remain 9m 6s) Loss: 0.0344(0.0743) EVAL: [792/3883] Data 0.001 (0.002) Elapsed 2m 19s (remain 9m 3s) Loss: 0.0835(0.0748) EVAL: [810/3883] Data 0.001 (0.002) Elapsed 2m 22s (remain 8m 59s) Loss: 0.0079(0.0747) EVAL: [828/3883] Data 0.001 (0.002) Elapsed 2m 25s (remain 8m 56s) Loss: 0.0116(0.0751) EVAL: [846/3883] Data 0.001 (0.002) Elapsed 2m 28s (remain 8m 53s) Loss: 0.0134(0.0753) EVAL: [864/3883] Data 0.001 (0.002) Elapsed 2m 32s (remain 8m 50s) Loss: 0.1624(0.0759) EVAL: [882/3883] Data 0.001 (0.002) Elapsed 2m 35s (remain 8m 47s) Loss: 0.0029(0.0759) EVAL: [900/3883] Data 0.001 (0.002) Elapsed 2m 38s (remain 8m 43s) Loss: 0.1615(0.0759) EVAL: [918/3883] Data 0.001 (0.002) Elapsed 2m 41s (remain 8m 40s) Loss: 0.0804(0.0761) EVAL: [936/3883] Data 0.001 (0.002) Elapsed 2m 44s (remain 8m 37s) Loss: 0.0738(0.0760) EVAL: [954/3883] Data 0.001 (0.002) Elapsed 2m 47s (remain 8m 34s) Loss: 0.0498(0.0756) EVAL: [972/3883] Data 0.001 (0.002) Elapsed 2m 50s (remain 8m 31s) Loss: 0.0209(0.0760) EVAL: [990/3883] Data 0.001 (0.002) Elapsed 2m 54s (remain 8m 27s) Loss: 0.0594(0.0756) EVAL: [1008/3883] Data 0.001 (0.002) Elapsed 2m 57s (remain 8m 24s) Loss: 0.1562(0.0758) EVAL: [1026/3883] Data 0.001 (0.002) Elapsed 3m 0s (remain 8m 21s) Loss: 0.0105(0.0754) EVAL: [1044/3883] Data 0.002 (0.001) Elapsed 3m 3s (remain 8m 18s) Loss: 0.0110(0.0755) EVAL: [1062/3883] Data 0.001 (0.001) Elapsed 3m 6s (remain 8m 15s) Loss: 0.0657(0.0754) EVAL: [1080/3883] Data 0.001 (0.001) Elapsed 3m 9s (remain 8m 11s) Loss: 0.0595(0.0752) EVAL: [1098/3883] Data 0.001 (0.001) Elapsed 3m 12s (remain 8m 8s) Loss: 0.0641(0.0749) EVAL: [1116/3883] Data 0.001 (0.001) Elapsed 3m 16s (remain 8m 5s) Loss: 0.0719(0.0748) EVAL: [1134/3883] Data 0.001 (0.001) Elapsed 3m 19s (remain 8m 2s) Loss: 0.0091(0.0746) EVAL: [1152/3883] Data 0.001 (0.001) Elapsed 3m 22s (remain 7m 59s) Loss: 0.0887(0.0745) EVAL: [1170/3883] Data 0.001 (0.001) Elapsed 3m 25s (remain 7m 56s) Loss: 0.0637(0.0742) EVAL: [1188/3883] Data 0.001 (0.001) Elapsed 3m 28s (remain 7m 52s) Loss: 0.0417(0.0742) EVAL: [1206/3883] Data 0.001 (0.001) Elapsed 3m 31s (remain 7m 49s) Loss: 0.0163(0.0741) EVAL: [1224/3883] Data 0.001 (0.001) Elapsed 3m 35s (remain 7m 46s) Loss: 0.1403(0.0742) EVAL: [1242/3883] Data 0.002 (0.001) Elapsed 3m 38s (remain 7m 43s) Loss: 0.0905(0.0742) EVAL: [1260/3883] Data 0.001 (0.001) Elapsed 3m 41s (remain 7m 40s) Loss: 0.3220(0.0745) EVAL: [1278/3883] Data 0.001 (0.001) Elapsed 3m 44s (remain 7m 37s) Loss: 0.1530(0.0749) EVAL: [1296/3883] Data 0.001 (0.001) Elapsed 3m 47s (remain 7m 33s) Loss: 0.0106(0.0750) EVAL: [1314/3883] Data 0.001 (0.001) Elapsed 3m 50s (remain 7m 30s) Loss: 0.0334(0.0752) EVAL: [1332/3883] Data 0.001 (0.001) Elapsed 3m 53s (remain 7m 27s) Loss: 0.0804(0.0752) EVAL: [1350/3883] Data 0.002 (0.001) Elapsed 3m 57s (remain 7m 24s) Loss: 0.0020(0.0752) EVAL: [1368/3883] Data 0.001 (0.001) Elapsed 4m 0s (remain 7m 21s) Loss: 0.2127(0.0756) EVAL: [1386/3883] Data 0.001 (0.001) Elapsed 4m 3s (remain 7m 17s) Loss: 0.0189(0.0754) EVAL: [1404/3883] Data 0.001 (0.001) Elapsed 4m 6s (remain 7m 14s) Loss: 0.0509(0.0755) EVAL: [1422/3883] Data 0.001 (0.001) Elapsed 4m 9s (remain 7m 11s) Loss: 0.0660(0.0754) EVAL: [1440/3883] Data 0.001 (0.001) Elapsed 4m 12s (remain 7m 8s) Loss: 0.0574(0.0751) EVAL: [1458/3883] Data 0.001 (0.001) Elapsed 4m 15s (remain 7m 5s) Loss: 0.0215(0.0752) EVAL: [1476/3883] Data 0.001 (0.001) Elapsed 4m 19s (remain 7m 2s) Loss: 0.0470(0.0750) EVAL: [1494/3883] Data 0.001 (0.001) Elapsed 4m 22s (remain 6m 58s) Loss: 0.1523(0.0749) EVAL: [1512/3883] Data 0.001 (0.001) Elapsed 4m 25s (remain 6m 55s) Loss: 0.2545(0.0749) EVAL: [1530/3883] Data 0.001 (0.001) Elapsed 4m 28s (remain 6m 52s) Loss: 0.1618(0.0749) EVAL: [1548/3883] Data 0.001 (0.001) Elapsed 4m 31s (remain 6m 49s) Loss: 0.0751(0.0748) EVAL: [1566/3883] Data 0.001 (0.001) Elapsed 4m 34s (remain 6m 46s) Loss: 0.1615(0.0745) EVAL: [1584/3883] Data 0.001 (0.001) Elapsed 4m 38s (remain 6m 43s) Loss: 0.0056(0.0746) EVAL: [1602/3883] Data 0.001 (0.001) Elapsed 4m 41s (remain 6m 39s) Loss: 0.1515(0.0748) EVAL: [1620/3883] Data 0.001 (0.001) Elapsed 4m 44s (remain 6m 36s) Loss: 0.1545(0.0752) EVAL: [1638/3883] Data 0.001 (0.001) Elapsed 4m 47s (remain 6m 33s) Loss: 0.1542(0.0755) EVAL: [1656/3883] Data 0.001 (0.001) Elapsed 4m 50s (remain 6m 30s) Loss: 0.0752(0.0755) EVAL: [1674/3883] Data 0.001 (0.001) Elapsed 4m 53s (remain 6m 27s) Loss: 0.1375(0.0756) EVAL: [1692/3883] Data 0.001 (0.001) Elapsed 4m 56s (remain 6m 24s) Loss: 0.0117(0.0755) EVAL: [1710/3883] Data 0.001 (0.001) Elapsed 5m 0s (remain 6m 20s) Loss: 0.0410(0.0757) EVAL: [1728/3883] Data 0.001 (0.001) Elapsed 5m 3s (remain 6m 17s) Loss: 0.0228(0.0757) EVAL: [1746/3883] Data 0.001 (0.001) Elapsed 5m 6s (remain 6m 14s) Loss: 0.0820(0.0757) EVAL: [1764/3883] Data 0.001 (0.001) Elapsed 5m 9s (remain 6m 11s) Loss: 0.0582(0.0758) EVAL: [1782/3883] Data 0.001 (0.001) Elapsed 5m 12s (remain 6m 8s) Loss: 0.2917(0.0759) EVAL: [1800/3883] Data 0.001 (0.001) Elapsed 5m 15s (remain 6m 5s) Loss: 0.1685(0.0760) EVAL: [1818/3883] Data 0.001 (0.001) Elapsed 5m 18s (remain 6m 1s) Loss: 0.0205(0.0759) EVAL: [1836/3883] Data 0.001 (0.001) Elapsed 5m 22s (remain 5m 58s) Loss: 0.0303(0.0761) EVAL: [1854/3883] Data 0.001 (0.001) Elapsed 5m 25s (remain 5m 55s) Loss: 0.0013(0.0761) EVAL: [1872/3883] Data 0.001 (0.001) Elapsed 5m 28s (remain 5m 52s) Loss: 0.1502(0.0762) EVAL: [1890/3883] Data 0.001 (0.001) Elapsed 5m 31s (remain 5m 49s) Loss: 0.0159(0.0762) EVAL: [1908/3883] Data 0.001 (0.001) Elapsed 5m 34s (remain 5m 46s) Loss: 0.0720(0.0763) EVAL: [1926/3883] Data 0.001 (0.001) Elapsed 5m 37s (remain 5m 42s) Loss: 0.0019(0.0762) EVAL: [1944/3883] Data 0.001 (0.001) Elapsed 5m 41s (remain 5m 39s) Loss: 0.0068(0.0764) EVAL: [1962/3883] Data 0.001 (0.001) Elapsed 5m 44s (remain 5m 36s) Loss: 0.0766(0.0766) EVAL: [1980/3883] Data 0.001 (0.001) Elapsed 5m 47s (remain 5m 33s) Loss: 0.1369(0.0767) EVAL: [1998/3883] Data 0.001 (0.001) Elapsed 5m 50s (remain 5m 30s) Loss: 0.1648(0.0767) EVAL: [2016/3883] Data 0.001 (0.001) Elapsed 5m 53s (remain 5m 27s) Loss: 0.0100(0.0770) EVAL: [2034/3883] Data 0.001 (0.001) Elapsed 5m 56s (remain 5m 24s) Loss: 0.0161(0.0770) EVAL: [2052/3883] Data 0.001 (0.001) Elapsed 5m 59s (remain 5m 20s) Loss: 0.1857(0.0769) EVAL: [2070/3883] Data 0.001 (0.001) Elapsed 6m 3s (remain 5m 17s) Loss: 0.1530(0.0770) EVAL: [2088/3883] Data 0.001 (0.001) Elapsed 6m 6s (remain 5m 14s) Loss: 0.0099(0.0769) EVAL: [2106/3883] Data 0.001 (0.001) Elapsed 6m 9s (remain 5m 11s) Loss: 0.0753(0.0769) EVAL: [2124/3883] Data 0.002 (0.001) Elapsed 6m 12s (remain 5m 8s) Loss: 0.0215(0.0766) EVAL: [2142/3883] Data 0.001 (0.001) Elapsed 6m 15s (remain 5m 5s) Loss: 0.0440(0.0765) EVAL: [2160/3883] Data 0.001 (0.001) Elapsed 6m 18s (remain 5m 1s) Loss: 0.0560(0.0763) EVAL: [2178/3883] Data 0.001 (0.001) Elapsed 6m 22s (remain 4m 58s) Loss: 0.0082(0.0761) EVAL: [2196/3883] Data 0.001 (0.001) Elapsed 6m 25s (remain 4m 55s) Loss: 0.0441(0.0761) EVAL: [2214/3883] Data 0.001 (0.001) Elapsed 6m 28s (remain 4m 52s) Loss: 0.0658(0.0760) EVAL: [2232/3883] Data 0.001 (0.001) Elapsed 6m 31s (remain 4m 49s) Loss: 0.1798(0.0761) EVAL: [2250/3883] Data 0.001 (0.001) Elapsed 6m 34s (remain 4m 46s) Loss: 0.4081(0.0763) EVAL: [2268/3883] Data 0.001 (0.001) Elapsed 6m 37s (remain 4m 42s) Loss: 0.0296(0.0764) EVAL: [2286/3883] Data 0.001 (0.001) Elapsed 6m 40s (remain 4m 39s) Loss: 0.1495(0.0764) EVAL: [2304/3883] Data 0.001 (0.001) Elapsed 6m 44s (remain 4m 36s) Loss: 0.0978(0.0762) EVAL: [2322/3883] Data 0.001 (0.001) Elapsed 6m 47s (remain 4m 33s) Loss: 0.0129(0.0762) EVAL: [2340/3883] Data 0.001 (0.001) Elapsed 6m 50s (remain 4m 30s) Loss: 0.0920(0.0762) EVAL: [2358/3883] Data 0.001 (0.001) Elapsed 6m 53s (remain 4m 27s) Loss: 0.1591(0.0761) EVAL: [2376/3883] Data 0.001 (0.001) Elapsed 6m 56s (remain 4m 23s) Loss: 0.1886(0.0763) EVAL: [2394/3883] Data 0.001 (0.001) Elapsed 6m 59s (remain 4m 20s) Loss: 0.2469(0.0763) EVAL: [2412/3883] Data 0.001 (0.001) Elapsed 7m 2s (remain 4m 17s) Loss: 0.1245(0.0762) EVAL: [2430/3883] Data 0.001 (0.001) Elapsed 7m 6s (remain 4m 14s) Loss: 0.0063(0.0761) EVAL: [2448/3883] Data 0.001 (0.001) Elapsed 7m 9s (remain 4m 11s) Loss: 0.0536(0.0759) EVAL: [2466/3883] Data 0.001 (0.001) Elapsed 7m 12s (remain 4m 8s) Loss: 0.0411(0.0758) EVAL: [2484/3883] Data 0.001 (0.001) Elapsed 7m 15s (remain 4m 5s) Loss: 0.1582(0.0760) EVAL: [2502/3883] Data 0.001 (0.001) Elapsed 7m 18s (remain 4m 1s) Loss: 0.0383(0.0762) EVAL: [2520/3883] Data 0.001 (0.001) Elapsed 7m 21s (remain 3m 58s) Loss: 0.0345(0.0761) EVAL: [2538/3883] Data 0.001 (0.001) Elapsed 7m 24s (remain 3m 55s) Loss: 0.0700(0.0763) EVAL: [2556/3883] Data 0.001 (0.001) Elapsed 7m 28s (remain 3m 52s) Loss: 0.3643(0.0763) EVAL: [2574/3883] Data 0.001 (0.001) Elapsed 7m 31s (remain 3m 49s) Loss: 0.4259(0.0764) EVAL: [2592/3883] Data 0.001 (0.001) Elapsed 7m 34s (remain 3m 46s) Loss: 0.0195(0.0765) EVAL: [2610/3883] Data 0.001 (0.001) Elapsed 7m 37s (remain 3m 42s) Loss: 0.0881(0.0766) EVAL: [2628/3883] Data 0.001 (0.001) Elapsed 7m 40s (remain 3m 39s) Loss: 0.0137(0.0767) EVAL: [2646/3883] Data 0.001 (0.001) Elapsed 7m 43s (remain 3m 36s) Loss: 0.0793(0.0766) EVAL: [2664/3883] Data 0.001 (0.001) Elapsed 7m 47s (remain 3m 33s) Loss: 0.0307(0.0766) EVAL: [2682/3883] Data 0.002 (0.001) Elapsed 7m 50s (remain 3m 30s) Loss: 0.0757(0.0765) EVAL: [2700/3883] Data 0.001 (0.001) Elapsed 7m 53s (remain 3m 27s) Loss: 0.0265(0.0765) EVAL: [2718/3883] Data 0.001 (0.001) Elapsed 7m 56s (remain 3m 23s) Loss: 0.1416(0.0765) EVAL: [2736/3883] Data 0.001 (0.001) Elapsed 7m 59s (remain 3m 20s) Loss: 0.0171(0.0766) EVAL: [2754/3883] Data 0.001 (0.001) Elapsed 8m 2s (remain 3m 17s) Loss: 0.0432(0.0770) EVAL: [2772/3883] Data 0.001 (0.001) Elapsed 8m 5s (remain 3m 14s) Loss: 0.0990(0.0769) EVAL: [2790/3883] Data 0.001 (0.001) Elapsed 8m 9s (remain 3m 11s) Loss: 0.0068(0.0769) EVAL: [2808/3883] Data 0.001 (0.001) Elapsed 8m 12s (remain 3m 8s) Loss: 0.1253(0.0769) EVAL: [2826/3883] Data 0.001 (0.001) Elapsed 8m 15s (remain 3m 5s) Loss: 0.0576(0.0768) EVAL: [2844/3883] Data 0.001 (0.001) Elapsed 8m 18s (remain 3m 1s) Loss: 0.0729(0.0768) EVAL: [2862/3883] Data 0.001 (0.001) Elapsed 8m 21s (remain 2m 58s) Loss: 0.1980(0.0768) EVAL: [2880/3883] Data 0.001 (0.001) Elapsed 8m 24s (remain 2m 55s) Loss: 0.0140(0.0767) EVAL: [2898/3883] Data 0.001 (0.001) Elapsed 8m 27s (remain 2m 52s) Loss: 0.2125(0.0767) EVAL: [2916/3883] Data 0.001 (0.001) Elapsed 8m 31s (remain 2m 49s) Loss: 0.0896(0.0766) EVAL: [2934/3883] Data 0.001 (0.001) Elapsed 8m 34s (remain 2m 46s) Loss: 0.1641(0.0766) EVAL: [2952/3883] Data 0.001 (0.001) Elapsed 8m 37s (remain 2m 42s) Loss: 0.0048(0.0765) EVAL: [2970/3883] Data 0.001 (0.001) Elapsed 8m 40s (remain 2m 39s) Loss: 0.0215(0.0765) EVAL: [2988/3883] Data 0.001 (0.001) Elapsed 8m 43s (remain 2m 36s) Loss: 0.0359(0.0766) EVAL: [3006/3883] Data 0.001 (0.001) Elapsed 8m 46s (remain 2m 33s) Loss: 0.0501(0.0765) EVAL: [3024/3883] Data 0.002 (0.001) Elapsed 8m 50s (remain 2m 30s) Loss: 0.1773(0.0765) EVAL: [3042/3883] Data 0.001 (0.001) Elapsed 8m 53s (remain 2m 27s) Loss: 0.0305(0.0763) EVAL: [3060/3883] Data 0.001 (0.001) Elapsed 8m 56s (remain 2m 24s) Loss: 0.0624(0.0764) EVAL: [3078/3883] Data 0.001 (0.001) Elapsed 8m 59s (remain 2m 20s) Loss: 0.1505(0.0763) EVAL: [3096/3883] Data 0.001 (0.001) Elapsed 9m 2s (remain 2m 17s) Loss: 0.3916(0.0764) EVAL: [3114/3883] Data 0.001 (0.001) Elapsed 9m 5s (remain 2m 14s) Loss: 0.0713(0.0764) EVAL: [3132/3883] Data 0.001 (0.001) Elapsed 9m 8s (remain 2m 11s) Loss: 0.0710(0.0763) EVAL: [3150/3883] Data 0.001 (0.001) Elapsed 9m 12s (remain 2m 8s) Loss: 0.0602(0.0762) EVAL: [3168/3883] Data 0.001 (0.001) Elapsed 9m 15s (remain 2m 5s) Loss: 0.1611(0.0761) EVAL: [3186/3883] Data 0.001 (0.001) Elapsed 9m 18s (remain 2m 1s) Loss: 0.0043(0.0760) EVAL: [3204/3883] Data 0.001 (0.001) Elapsed 9m 21s (remain 1m 58s) Loss: 0.0123(0.0759) EVAL: [3222/3883] Data 0.001 (0.001) Elapsed 9m 24s (remain 1m 55s) Loss: 0.1212(0.0759) EVAL: [3240/3883] Data 0.001 (0.001) Elapsed 9m 27s (remain 1m 52s) Loss: 0.3712(0.0760) EVAL: [3258/3883] Data 0.001 (0.001) Elapsed 9m 30s (remain 1m 49s) Loss: 0.0189(0.0761) EVAL: [3276/3883] Data 0.001 (0.001) Elapsed 9m 34s (remain 1m 46s) Loss: 0.0440(0.0760) EVAL: [3294/3883] Data 0.001 (0.001) Elapsed 9m 37s (remain 1m 43s) Loss: 0.1172(0.0761) EVAL: [3312/3883] Data 0.001 (0.001) Elapsed 9m 40s (remain 1m 39s) Loss: 0.0023(0.0761) EVAL: [3330/3883] Data 0.001 (0.001) Elapsed 9m 43s (remain 1m 36s) Loss: 0.1429(0.0760) EVAL: [3348/3883] Data 0.001 (0.001) Elapsed 9m 46s (remain 1m 33s) Loss: 0.0461(0.0760) EVAL: [3366/3883] Data 0.001 (0.001) Elapsed 9m 49s (remain 1m 30s) Loss: 0.0097(0.0760) EVAL: [3384/3883] Data 0.003 (0.001) Elapsed 9m 53s (remain 1m 27s) Loss: 0.1002(0.0760) EVAL: [3402/3883] Data 0.001 (0.001) Elapsed 9m 56s (remain 1m 24s) Loss: 0.1016(0.0761) EVAL: [3420/3883] Data 0.001 (0.001) Elapsed 9m 59s (remain 1m 20s) Loss: 0.1569(0.0760) EVAL: [3438/3883] Data 0.001 (0.001) Elapsed 10m 2s (remain 1m 17s) Loss: 0.0987(0.0761) EVAL: [3456/3883] Data 0.001 (0.001) Elapsed 10m 5s (remain 1m 14s) Loss: 0.1878(0.0762) EVAL: [3474/3883] Data 0.001 (0.001) Elapsed 10m 8s (remain 1m 11s) Loss: 0.0157(0.0762) EVAL: [3492/3883] Data 0.001 (0.001) Elapsed 10m 11s (remain 1m 8s) Loss: 0.0398(0.0763) EVAL: [3510/3883] Data 0.001 (0.001) Elapsed 10m 15s (remain 1m 5s) Loss: 0.0289(0.0762) EVAL: [3528/3883] Data 0.001 (0.001) Elapsed 10m 18s (remain 1m 2s) Loss: 0.5603(0.0765) EVAL: [3546/3883] Data 0.001 (0.001) Elapsed 10m 21s (remain 0m 58s) Loss: 0.1925(0.0765) EVAL: [3564/3883] Data 0.001 (0.001) Elapsed 10m 24s (remain 0m 55s) Loss: 0.0781(0.0763) EVAL: [3582/3883] Data 0.001 (0.001) Elapsed 10m 27s (remain 0m 52s) Loss: 0.0247(0.0764) EVAL: [3600/3883] Data 0.001 (0.001) Elapsed 10m 30s (remain 0m 49s) Loss: 0.0341(0.0764) EVAL: [3618/3883] Data 0.001 (0.001) Elapsed 10m 34s (remain 0m 46s) Loss: 0.1679(0.0764) EVAL: [3636/3883] Data 0.001 (0.001) Elapsed 10m 37s (remain 0m 43s) Loss: 0.0117(0.0765) EVAL: [3654/3883] Data 0.001 (0.001) Elapsed 10m 40s (remain 0m 39s) Loss: 0.0180(0.0764) EVAL: [3672/3883] Data 0.001 (0.001) Elapsed 10m 43s (remain 0m 36s) Loss: 0.3095(0.0765) EVAL: [3690/3883] Data 0.001 (0.001) Elapsed 10m 46s (remain 0m 33s) Loss: 0.0208(0.0766) EVAL: [3708/3883] Data 0.002 (0.001) Elapsed 10m 49s (remain 0m 30s) Loss: 0.0253(0.0767) EVAL: [3726/3883] Data 0.001 (0.001) Elapsed 10m 52s (remain 0m 27s) Loss: 0.0846(0.0766) EVAL: [3744/3883] Data 0.001 (0.001) Elapsed 10m 56s (remain 0m 24s) Loss: 0.0817(0.0767) EVAL: [3762/3883] Data 0.001 (0.001) Elapsed 10m 59s (remain 0m 21s) Loss: 0.0124(0.0766) EVAL: [3780/3883] Data 0.001 (0.001) Elapsed 11m 2s (remain 0m 17s) Loss: 0.0040(0.0766) EVAL: [3798/3883] Data 0.001 (0.001) Elapsed 11m 5s (remain 0m 14s) Loss: 0.0063(0.0766) EVAL: [3816/3883] Data 0.001 (0.001) Elapsed 11m 8s (remain 0m 11s) Loss: 0.0110(0.0767) EVAL: [3834/3883] Data 0.001 (0.001) Elapsed 11m 11s (remain 0m 8s) Loss: 0.0281(0.0767) EVAL: [3852/3883] Data 0.001 (0.001) Elapsed 11m 14s (remain 0m 5s) Loss: 0.1866(0.0768) EVAL: [3870/3883] Data 0.001 (0.001) Elapsed 11m 18s (remain 0m 2s) Loss: 0.0265(0.0766)
Epoch 5 - avg_train_loss: 0.3368 avg_val_loss: 0.0766 time: 1125s Epoch 5 - AUC: 0.9636182896178385 - pAUC: 0.17513211316620478 Epoch 5 - Save Best Score: 0.1751 Model
EVAL: [3882/3883] Data 0.001 (0.001) Elapsed 11m 20s (remain 0m 0s) Loss: 0.1657(0.0766) Epoch: [6][0/650] Data 0.863 (0.863) Elapsed 0m 1s (remain 14m 8s) Loss: 0.4135(0.4135) Grad: 10.3022 LR: 0.000051 Epoch: [6][18/650] Data 0.272 (0.295) Elapsed 0m 13s (remain 7m 31s) Loss: 0.2558(0.3027) Grad: 9.4665 LR: 0.000051 Epoch: [6][36/650] Data 0.272 (0.281) Elapsed 0m 25s (remain 7m 9s) Loss: 0.7502(0.3213) Grad: 19.7929 LR: 0.000051 Epoch: [6][54/650] Data 0.272 (0.276) Elapsed 0m 38s (remain 6m 53s) Loss: 0.2778(0.3280) Grad: 8.3278 LR: 0.000051 Epoch: [6][72/650] Data 0.270 (0.273) Elapsed 0m 50s (remain 6m 39s) Loss: 0.1938(0.3381) Grad: 5.6240 LR: 0.000051 Epoch: [6][90/650] Data 0.269 (0.272) Elapsed 1m 2s (remain 6m 25s) Loss: 0.4761(0.3438) Grad: 13.7877 LR: 0.000051 Epoch: [6][108/650] Data 0.272 (0.271) Elapsed 1m 15s (remain 6m 12s) Loss: 0.4925(0.3434) Grad: 13.0967 LR: 0.000051 Epoch: [6][126/650] Data 0.254 (0.271) Elapsed 1m 27s (remain 6m 0s) Loss: 0.2403(0.3350) Grad: 7.3619 LR: 0.000051 Epoch: [6][144/650] Data 0.271 (0.270) Elapsed 1m 39s (remain 5m 47s) Loss: 0.4898(0.3346) Grad: 11.5122 LR: 0.000051 Epoch: [6][162/650] Data 0.272 (0.270) Elapsed 1m 52s (remain 5m 34s) Loss: 0.2717(0.3278) Grad: 5.9697 LR: 0.000051 Epoch: [6][180/650] Data 0.271 (0.270) Elapsed 2m 4s (remain 5m 22s) Loss: 0.2895(0.3302) Grad: 9.1390 LR: 0.000051 Epoch: [6][198/650] Data 0.263 (0.269) Elapsed 2m 16s (remain 5m 9s) Loss: 0.4375(0.3285) Grad: 13.7522 LR: 0.000051 Epoch: [6][216/650] Data 0.270 (0.269) Elapsed 2m 28s (remain 4m 57s) Loss: 0.3005(0.3269) Grad: 6.7133 LR: 0.000051 Epoch: [6][234/650] Data 0.266 (0.269) Elapsed 2m 41s (remain 4m 44s) Loss: 0.4027(0.3250) Grad: 8.7941 LR: 0.000051 Epoch: [6][252/650] Data 0.264 (0.268) Elapsed 2m 53s (remain 4m 32s) Loss: 0.4270(0.3273) Grad: 15.2972 LR: 0.000051 Epoch: [6][270/650] Data 0.269 (0.268) Elapsed 3m 5s (remain 4m 20s) Loss: 0.2048(0.3263) Grad: 5.9822 LR: 0.000051 Epoch: [6][288/650] Data 0.266 (0.268) Elapsed 3m 18s (remain 4m 7s) Loss: 0.3121(0.3239) Grad: 6.9544 LR: 0.000051 Epoch: [6][306/650] Data 0.266 (0.268) Elapsed 3m 30s (remain 3m 55s) Loss: 0.3492(0.3253) Grad: 12.4343 LR: 0.000051 Epoch: [6][324/650] Data 0.270 (0.268) Elapsed 3m 42s (remain 3m 42s) Loss: 0.2124(0.3242) Grad: 6.5450 LR: 0.000051 Epoch: [6][342/650] Data 0.271 (0.267) Elapsed 3m 55s (remain 3m 30s) Loss: 0.2346(0.3212) Grad: 7.9441 LR: 0.000051 Epoch: [6][360/650] Data 0.272 (0.267) Elapsed 4m 7s (remain 3m 18s) Loss: 0.1302(0.3203) Grad: 4.9421 LR: 0.000051 Epoch: [6][378/650] Data 0.271 (0.267) Elapsed 4m 19s (remain 3m 5s) Loss: 0.4421(0.3207) Grad: 11.5121 LR: 0.000051 Epoch: [6][396/650] Data 0.266 (0.267) Elapsed 4m 32s (remain 2m 53s) Loss: 0.1618(0.3203) Grad: 9.9625 LR: 0.000051 Epoch: [6][414/650] Data 0.271 (0.267) Elapsed 4m 44s (remain 2m 41s) Loss: 0.5075(0.3190) Grad: 13.9074 LR: 0.000051 Epoch: [6][432/650] Data 0.271 (0.267) Elapsed 4m 56s (remain 2m 28s) Loss: 0.3391(0.3179) Grad: 9.9177 LR: 0.000051 Epoch: [6][450/650] Data 0.271 (0.267) Elapsed 5m 9s (remain 2m 16s) Loss: 0.4648(0.3190) Grad: 10.5599 LR: 0.000051 Epoch: [6][468/650] Data 0.272 (0.267) Elapsed 5m 21s (remain 2m 4s) Loss: 0.3260(0.3201) Grad: 12.7766 LR: 0.000051 Epoch: [6][486/650] Data 0.270 (0.267) Elapsed 5m 33s (remain 1m 51s) Loss: 0.4032(0.3203) Grad: 8.6771 LR: 0.000051 Epoch: [6][504/650] Data 0.272 (0.267) Elapsed 5m 45s (remain 1m 39s) Loss: 0.1699(0.3190) Grad: 5.1662 LR: 0.000051 Epoch: [6][522/650] Data 0.247 (0.267) Elapsed 5m 58s (remain 1m 26s) Loss: 0.0791(0.3187) Grad: 2.8019 LR: 0.000051 Epoch: [6][540/650] Data 0.271 (0.267) Elapsed 6m 10s (remain 1m 14s) Loss: 0.3469(0.3183) Grad: 9.7233 LR: 0.000051 Epoch: [6][558/650] Data 0.260 (0.267) Elapsed 6m 22s (remain 1m 2s) Loss: 0.1958(0.3176) Grad: 5.4328 LR: 0.000051 Epoch: [6][576/650] Data 0.257 (0.267) Elapsed 6m 35s (remain 0m 49s) Loss: 0.5494(0.3177) Grad: 16.0662 LR: 0.000051 Epoch: [6][594/650] Data 0.271 (0.267) Elapsed 6m 47s (remain 0m 37s) Loss: 0.3536(0.3175) Grad: 10.4143 LR: 0.000051 Epoch: [6][612/650] Data 0.272 (0.267) Elapsed 6m 59s (remain 0m 25s) Loss: 0.4899(0.3169) Grad: 13.6072 LR: 0.000051 Epoch: [6][630/650] Data 0.272 (0.267) Elapsed 7m 12s (remain 0m 13s) Loss: 0.5676(0.3152) Grad: 10.7350 LR: 0.000051 Epoch: [6][648/650] Data 0.272 (0.267) Elapsed 7m 24s (remain 0m 0s) Loss: 0.2386(0.3145) Grad: 8.8718 LR: 0.000051 Epoch: [6][649/650] Data 0.273 (0.267) Elapsed 7m 25s (remain 0m 0s) Loss: 0.2590(0.3144) Grad: 8.6117 LR: 0.000051 EVAL: [0/3883] Data 0.520 (0.520) Elapsed 0m 0s (remain 45m 10s) Loss: 0.1100(0.1100) EVAL: [18/3883] Data 0.001 (0.029) Elapsed 0m 3s (remain 13m 10s) Loss: 0.0982(0.0787) EVAL: [36/3883] Data 0.001 (0.016) Elapsed 0m 7s (remain 12m 11s) Loss: 0.0551(0.0910) EVAL: [54/3883] Data 0.001 (0.011) Elapsed 0m 10s (remain 11m 49s) Loss: 0.0035(0.0879) EVAL: [72/3883] Data 0.001 (0.008) Elapsed 0m 13s (remain 11m 36s) Loss: 0.2558(0.1049) EVAL: [90/3883] Data 0.001 (0.007) Elapsed 0m 16s (remain 11m 27s) Loss: 0.2138(0.1120) EVAL: [108/3883] Data 0.001 (0.006) Elapsed 0m 19s (remain 11m 20s) Loss: 0.1706(0.1132) EVAL: [126/3883] Data 0.001 (0.005) Elapsed 0m 22s (remain 11m 14s) Loss: 0.3124(0.1174) EVAL: [144/3883] Data 0.001 (0.005) Elapsed 0m 25s (remain 11m 8s) Loss: 0.1551(0.1148) EVAL: [162/3883] Data 0.001 (0.004) Elapsed 0m 29s (remain 11m 3s) Loss: 0.0169(0.1143) EVAL: [180/3883] Data 0.001 (0.004) Elapsed 0m 32s (remain 10m 59s) Loss: 0.0042(0.1108) EVAL: [198/3883] Data 0.001 (0.004) Elapsed 0m 35s (remain 10m 55s) Loss: 0.0417(0.1151) EVAL: [216/3883] Data 0.001 (0.003) Elapsed 0m 38s (remain 10m 51s) Loss: 0.0148(0.1158) EVAL: [234/3883] Data 0.001 (0.003) Elapsed 0m 41s (remain 10m 47s) Loss: 0.1236(0.1151) EVAL: [252/3883] Data 0.001 (0.003) Elapsed 0m 44s (remain 10m 43s) Loss: 0.0838(0.1124) EVAL: [270/3883] Data 0.001 (0.003) Elapsed 0m 48s (remain 10m 39s) Loss: 0.0477(0.1144) EVAL: [288/3883] Data 0.001 (0.003) Elapsed 0m 51s (remain 10m 36s) Loss: 0.3248(0.1168) EVAL: [306/3883] Data 0.001 (0.003) Elapsed 0m 54s (remain 10m 32s) Loss: 0.0614(0.1165) EVAL: [324/3883] Data 0.001 (0.003) Elapsed 0m 57s (remain 10m 28s) Loss: 0.0603(0.1168) EVAL: [342/3883] Data 0.001 (0.002) Elapsed 1m 0s (remain 10m 25s) Loss: 0.0034(0.1187) EVAL: [360/3883] Data 0.001 (0.002) Elapsed 1m 3s (remain 10m 21s) Loss: 0.0504(0.1188) EVAL: [378/3883] Data 0.001 (0.002) Elapsed 1m 6s (remain 10m 18s) Loss: 0.0836(0.1184) EVAL: [396/3883] Data 0.001 (0.002) Elapsed 1m 10s (remain 10m 15s) Loss: 0.3076(0.1182) EVAL: [414/3883] Data 0.001 (0.002) Elapsed 1m 13s (remain 10m 11s) Loss: 0.1857(0.1193) EVAL: [432/3883] Data 0.001 (0.002) Elapsed 1m 16s (remain 10m 8s) Loss: 0.0414(0.1195) EVAL: [450/3883] Data 0.001 (0.002) Elapsed 1m 19s (remain 10m 5s) Loss: 0.1544(0.1201) EVAL: [468/3883] Data 0.001 (0.002) Elapsed 1m 22s (remain 10m 1s) Loss: 0.0489(0.1216) EVAL: [486/3883] Data 0.001 (0.002) Elapsed 1m 25s (remain 9m 58s) Loss: 0.3141(0.1212) EVAL: [504/3883] Data 0.001 (0.002) Elapsed 1m 28s (remain 9m 55s) Loss: 0.0491(0.1216) EVAL: [522/3883] Data 0.001 (0.002) Elapsed 1m 32s (remain 9m 51s) Loss: 0.2091(0.1213) EVAL: [540/3883] Data 0.001 (0.002) Elapsed 1m 35s (remain 9m 48s) Loss: 0.0712(0.1201) EVAL: [558/3883] Data 0.001 (0.002) Elapsed 1m 38s (remain 9m 45s) Loss: 0.3125(0.1211) EVAL: [576/3883] Data 0.001 (0.002) Elapsed 1m 41s (remain 9m 41s) Loss: 0.0145(0.1200) EVAL: [594/3883] Data 0.001 (0.002) Elapsed 1m 44s (remain 9m 38s) Loss: 0.0973(0.1192) EVAL: [612/3883] Data 0.001 (0.002) Elapsed 1m 47s (remain 9m 35s) Loss: 0.0881(0.1188) EVAL: [630/3883] Data 0.001 (0.002) Elapsed 1m 51s (remain 9m 32s) Loss: 0.0547(0.1185) EVAL: [648/3883] Data 0.001 (0.002) Elapsed 1m 54s (remain 9m 28s) Loss: 0.0176(0.1182) EVAL: [666/3883] Data 0.001 (0.002) Elapsed 1m 57s (remain 9m 25s) Loss: 0.1475(0.1183) EVAL: [684/3883] Data 0.001 (0.002) Elapsed 2m 0s (remain 9m 22s) Loss: 0.1657(0.1187) EVAL: [702/3883] Data 0.001 (0.002) Elapsed 2m 3s (remain 9m 19s) Loss: 0.1310(0.1173) EVAL: [720/3883] Data 0.001 (0.002) Elapsed 2m 6s (remain 9m 15s) Loss: 0.1102(0.1167) EVAL: [738/3883] Data 0.001 (0.002) Elapsed 2m 9s (remain 9m 12s) Loss: 0.0230(0.1169) EVAL: [756/3883] Data 0.001 (0.002) Elapsed 2m 13s (remain 9m 9s) Loss: 0.1053(0.1169) EVAL: [774/3883] Data 0.001 (0.002) Elapsed 2m 16s (remain 9m 6s) Loss: 0.0588(0.1165) EVAL: [792/3883] Data 0.001 (0.002) Elapsed 2m 19s (remain 9m 3s) Loss: 0.1306(0.1172) EVAL: [810/3883] Data 0.004 (0.002) Elapsed 2m 22s (remain 8m 59s) Loss: 0.0154(0.1176) EVAL: [828/3883] Data 0.001 (0.002) Elapsed 2m 25s (remain 8m 56s) Loss: 0.0188(0.1183) EVAL: [846/3883] Data 0.001 (0.002) Elapsed 2m 28s (remain 8m 53s) Loss: 0.0634(0.1184) EVAL: [864/3883] Data 0.001 (0.002) Elapsed 2m 31s (remain 8m 50s) Loss: 0.2903(0.1193) EVAL: [882/3883] Data 0.001 (0.002) Elapsed 2m 35s (remain 8m 47s) Loss: 0.0032(0.1192) EVAL: [900/3883] Data 0.001 (0.002) Elapsed 2m 38s (remain 8m 43s) Loss: 0.2467(0.1192) EVAL: [918/3883] Data 0.001 (0.002) Elapsed 2m 41s (remain 8m 40s) Loss: 0.1278(0.1195) EVAL: [936/3883] Data 0.001 (0.002) Elapsed 2m 44s (remain 8m 37s) Loss: 0.1745(0.1197) EVAL: [954/3883] Data 0.001 (0.001) Elapsed 2m 47s (remain 8m 34s) Loss: 0.0909(0.1194) EVAL: [972/3883] Data 0.001 (0.001) Elapsed 2m 50s (remain 8m 31s) Loss: 0.0275(0.1198) EVAL: [990/3883] Data 0.001 (0.001) Elapsed 2m 54s (remain 8m 27s) Loss: 0.0926(0.1193) EVAL: [1008/3883] Data 0.001 (0.001) Elapsed 2m 57s (remain 8m 24s) Loss: 0.3017(0.1201) EVAL: [1026/3883] Data 0.001 (0.001) Elapsed 3m 0s (remain 8m 21s) Loss: 0.0233(0.1195) EVAL: [1044/3883] Data 0.001 (0.001) Elapsed 3m 3s (remain 8m 18s) Loss: 0.0149(0.1195) EVAL: [1062/3883] Data 0.001 (0.001) Elapsed 3m 6s (remain 8m 15s) Loss: 0.0787(0.1194) EVAL: [1080/3883] Data 0.001 (0.001) Elapsed 3m 9s (remain 8m 11s) Loss: 0.0711(0.1192) EVAL: [1098/3883] Data 0.002 (0.001) Elapsed 3m 12s (remain 8m 8s) Loss: 0.0944(0.1187) EVAL: [1116/3883] Data 0.001 (0.001) Elapsed 3m 16s (remain 8m 5s) Loss: 0.0819(0.1186) EVAL: [1134/3883] Data 0.001 (0.001) Elapsed 3m 19s (remain 8m 2s) Loss: 0.0187(0.1183) EVAL: [1152/3883] Data 0.001 (0.001) Elapsed 3m 22s (remain 7m 59s) Loss: 0.1165(0.1180) EVAL: [1170/3883] Data 0.001 (0.001) Elapsed 3m 25s (remain 7m 56s) Loss: 0.1045(0.1174) EVAL: [1188/3883] Data 0.001 (0.001) Elapsed 3m 28s (remain 7m 52s) Loss: 0.0752(0.1174) EVAL: [1206/3883] Data 0.001 (0.001) Elapsed 3m 31s (remain 7m 49s) Loss: 0.0141(0.1174) EVAL: [1224/3883] Data 0.001 (0.001) Elapsed 3m 34s (remain 7m 46s) Loss: 0.1940(0.1175) EVAL: [1242/3883] Data 0.001 (0.001) Elapsed 3m 38s (remain 7m 43s) Loss: 0.1001(0.1176) EVAL: [1260/3883] Data 0.001 (0.001) Elapsed 3m 41s (remain 7m 40s) Loss: 0.4672(0.1179) EVAL: [1278/3883] Data 0.001 (0.001) Elapsed 3m 44s (remain 7m 36s) Loss: 0.2081(0.1183) EVAL: [1296/3883] Data 0.001 (0.001) Elapsed 3m 47s (remain 7m 33s) Loss: 0.0083(0.1185) EVAL: [1314/3883] Data 0.001 (0.001) Elapsed 3m 50s (remain 7m 30s) Loss: 0.0359(0.1189) EVAL: [1332/3883] Data 0.001 (0.001) Elapsed 3m 53s (remain 7m 27s) Loss: 0.1475(0.1186) EVAL: [1350/3883] Data 0.001 (0.001) Elapsed 3m 57s (remain 7m 24s) Loss: 0.0139(0.1187) EVAL: [1368/3883] Data 0.002 (0.001) Elapsed 4m 0s (remain 7m 21s) Loss: 0.2664(0.1194) EVAL: [1386/3883] Data 0.001 (0.001) Elapsed 4m 3s (remain 7m 17s) Loss: 0.0946(0.1192) EVAL: [1404/3883] Data 0.001 (0.001) Elapsed 4m 6s (remain 7m 14s) Loss: 0.0638(0.1193) EVAL: [1422/3883] Data 0.001 (0.001) Elapsed 4m 9s (remain 7m 11s) Loss: 0.1028(0.1191) EVAL: [1440/3883] Data 0.001 (0.001) Elapsed 4m 12s (remain 7m 8s) Loss: 0.1162(0.1187) EVAL: [1458/3883] Data 0.004 (0.001) Elapsed 4m 15s (remain 7m 5s) Loss: 0.0335(0.1188) EVAL: [1476/3883] Data 0.001 (0.001) Elapsed 4m 19s (remain 7m 2s) Loss: 0.0592(0.1185) EVAL: [1494/3883] Data 0.001 (0.001) Elapsed 4m 22s (remain 6m 58s) Loss: 0.2484(0.1185) EVAL: [1512/3883] Data 0.001 (0.001) Elapsed 4m 25s (remain 6m 55s) Loss: 0.3675(0.1184) EVAL: [1530/3883] Data 0.001 (0.001) Elapsed 4m 28s (remain 6m 52s) Loss: 0.2425(0.1184) EVAL: [1548/3883] Data 0.001 (0.001) Elapsed 4m 31s (remain 6m 49s) Loss: 0.0898(0.1181) EVAL: [1566/3883] Data 0.001 (0.001) Elapsed 4m 34s (remain 6m 46s) Loss: 0.2298(0.1178) EVAL: [1584/3883] Data 0.001 (0.001) Elapsed 4m 38s (remain 6m 43s) Loss: 0.0047(0.1179) EVAL: [1602/3883] Data 0.001 (0.001) Elapsed 4m 41s (remain 6m 39s) Loss: 0.2268(0.1181) EVAL: [1620/3883] Data 0.001 (0.001) Elapsed 4m 44s (remain 6m 36s) Loss: 0.2021(0.1188) EVAL: [1638/3883] Data 0.003 (0.001) Elapsed 4m 47s (remain 6m 33s) Loss: 0.2108(0.1191) EVAL: [1656/3883] Data 0.001 (0.001) Elapsed 4m 50s (remain 6m 30s) Loss: 0.1192(0.1192) EVAL: [1674/3883] Data 0.001 (0.001) Elapsed 4m 53s (remain 6m 27s) Loss: 0.1663(0.1192) EVAL: [1692/3883] Data 0.001 (0.001) Elapsed 4m 56s (remain 6m 24s) Loss: 0.0333(0.1189) EVAL: [1710/3883] Data 0.001 (0.001) Elapsed 5m 0s (remain 6m 20s) Loss: 0.1189(0.1193) EVAL: [1728/3883] Data 0.001 (0.001) Elapsed 5m 3s (remain 6m 17s) Loss: 0.0423(0.1193) EVAL: [1746/3883] Data 0.001 (0.001) Elapsed 5m 6s (remain 6m 14s) Loss: 0.1374(0.1193) EVAL: [1764/3883] Data 0.001 (0.001) Elapsed 5m 9s (remain 6m 11s) Loss: 0.1158(0.1195) EVAL: [1782/3883] Data 0.001 (0.001) Elapsed 5m 12s (remain 6m 8s) Loss: 0.4451(0.1197) EVAL: [1800/3883] Data 0.001 (0.001) Elapsed 5m 15s (remain 6m 5s) Loss: 0.2391(0.1197) EVAL: [1818/3883] Data 0.001 (0.001) Elapsed 5m 18s (remain 6m 1s) Loss: 0.0170(0.1198) EVAL: [1836/3883] Data 0.001 (0.001) Elapsed 5m 22s (remain 5m 58s) Loss: 0.0289(0.1201) EVAL: [1854/3883] Data 0.001 (0.001) Elapsed 5m 25s (remain 5m 55s) Loss: 0.0006(0.1201) EVAL: [1872/3883] Data 0.001 (0.001) Elapsed 5m 28s (remain 5m 52s) Loss: 0.1896(0.1202) EVAL: [1890/3883] Data 0.001 (0.001) Elapsed 5m 31s (remain 5m 49s) Loss: 0.0209(0.1202) EVAL: [1908/3883] Data 0.001 (0.001) Elapsed 5m 34s (remain 5m 46s) Loss: 0.1260(0.1201) EVAL: [1926/3883] Data 0.001 (0.001) Elapsed 5m 37s (remain 5m 42s) Loss: 0.0178(0.1200) EVAL: [1944/3883] Data 0.002 (0.001) Elapsed 5m 41s (remain 5m 39s) Loss: 0.0069(0.1203) EVAL: [1962/3883] Data 0.001 (0.001) Elapsed 5m 44s (remain 5m 36s) Loss: 0.1123(0.1207) EVAL: [1980/3883] Data 0.001 (0.001) Elapsed 5m 47s (remain 5m 33s) Loss: 0.1865(0.1208) EVAL: [1998/3883] Data 0.001 (0.001) Elapsed 5m 50s (remain 5m 30s) Loss: 0.0814(0.1207) EVAL: [2016/3883] Data 0.001 (0.001) Elapsed 5m 53s (remain 5m 27s) Loss: 0.0117(0.1210) EVAL: [2034/3883] Data 0.001 (0.001) Elapsed 5m 56s (remain 5m 24s) Loss: 0.0185(0.1210) EVAL: [2052/3883] Data 0.001 (0.001) Elapsed 5m 59s (remain 5m 20s) Loss: 0.3959(0.1210) EVAL: [2070/3883] Data 0.001 (0.001) Elapsed 6m 3s (remain 5m 17s) Loss: 0.2530(0.1214) EVAL: [2088/3883] Data 0.001 (0.001) Elapsed 6m 6s (remain 5m 14s) Loss: 0.0139(0.1211) EVAL: [2106/3883] Data 0.001 (0.001) Elapsed 6m 9s (remain 5m 11s) Loss: 0.0936(0.1211) EVAL: [2124/3883] Data 0.001 (0.001) Elapsed 6m 12s (remain 5m 8s) Loss: 0.0238(0.1206) EVAL: [2142/3883] Data 0.001 (0.001) Elapsed 6m 15s (remain 5m 5s) Loss: 0.0595(0.1205) EVAL: [2160/3883] Data 0.001 (0.001) Elapsed 6m 18s (remain 5m 1s) Loss: 0.1373(0.1202) EVAL: [2178/3883] Data 0.001 (0.001) Elapsed 6m 21s (remain 4m 58s) Loss: 0.0026(0.1200) EVAL: [2196/3883] Data 0.001 (0.001) Elapsed 6m 25s (remain 4m 55s) Loss: 0.0650(0.1199) EVAL: [2214/3883] Data 0.001 (0.001) Elapsed 6m 28s (remain 4m 52s) Loss: 0.0695(0.1197) EVAL: [2232/3883] Data 0.001 (0.001) Elapsed 6m 31s (remain 4m 49s) Loss: 0.2816(0.1198) EVAL: [2250/3883] Data 0.001 (0.001) Elapsed 6m 34s (remain 4m 46s) Loss: 0.5368(0.1199) EVAL: [2268/3883] Data 0.001 (0.001) Elapsed 6m 37s (remain 4m 42s) Loss: 0.0355(0.1200) EVAL: [2286/3883] Data 0.001 (0.001) Elapsed 6m 40s (remain 4m 39s) Loss: 0.2814(0.1201) EVAL: [2304/3883] Data 0.001 (0.001) Elapsed 6m 44s (remain 4m 36s) Loss: 0.1359(0.1199) EVAL: [2322/3883] Data 0.001 (0.001) Elapsed 6m 47s (remain 4m 33s) Loss: 0.0408(0.1199) EVAL: [2340/3883] Data 0.001 (0.001) Elapsed 6m 50s (remain 4m 30s) Loss: 0.1064(0.1198) EVAL: [2358/3883] Data 0.001 (0.001) Elapsed 6m 53s (remain 4m 27s) Loss: 0.2808(0.1197) EVAL: [2376/3883] Data 0.001 (0.001) Elapsed 6m 56s (remain 4m 23s) Loss: 0.2605(0.1199) EVAL: [2394/3883] Data 0.001 (0.001) Elapsed 6m 59s (remain 4m 20s) Loss: 0.3779(0.1198) EVAL: [2412/3883] Data 0.001 (0.001) Elapsed 7m 2s (remain 4m 17s) Loss: 0.2102(0.1198) EVAL: [2430/3883] Data 0.001 (0.001) Elapsed 7m 6s (remain 4m 14s) Loss: 0.0151(0.1196) EVAL: [2448/3883] Data 0.001 (0.001) Elapsed 7m 9s (remain 4m 11s) Loss: 0.0848(0.1193) EVAL: [2466/3883] Data 0.001 (0.001) Elapsed 7m 12s (remain 4m 8s) Loss: 0.0747(0.1192) EVAL: [2484/3883] Data 0.001 (0.001) Elapsed 7m 15s (remain 4m 5s) Loss: 0.2582(0.1194) EVAL: [2502/3883] Data 0.001 (0.001) Elapsed 7m 18s (remain 4m 1s) Loss: 0.0445(0.1196) EVAL: [2520/3883] Data 0.001 (0.001) Elapsed 7m 21s (remain 3m 58s) Loss: 0.0325(0.1193) EVAL: [2538/3883] Data 0.001 (0.001) Elapsed 7m 25s (remain 3m 55s) Loss: 0.1030(0.1196) EVAL: [2556/3883] Data 0.001 (0.001) Elapsed 7m 28s (remain 3m 52s) Loss: 0.5155(0.1195) EVAL: [2574/3883] Data 0.001 (0.001) Elapsed 7m 31s (remain 3m 49s) Loss: 0.5506(0.1195) EVAL: [2592/3883] Data 0.001 (0.001) Elapsed 7m 34s (remain 3m 46s) Loss: 0.0389(0.1196) EVAL: [2610/3883] Data 0.001 (0.001) Elapsed 7m 37s (remain 3m 42s) Loss: 0.1480(0.1199) EVAL: [2628/3883] Data 0.001 (0.001) Elapsed 7m 40s (remain 3m 39s) Loss: 0.0215(0.1199) EVAL: [2646/3883] Data 0.001 (0.001) Elapsed 7m 43s (remain 3m 36s) Loss: 0.1375(0.1199) EVAL: [2664/3883] Data 0.001 (0.001) Elapsed 7m 47s (remain 3m 33s) Loss: 0.0885(0.1197) EVAL: [2682/3883] Data 0.001 (0.001) Elapsed 7m 50s (remain 3m 30s) Loss: 0.1115(0.1196) EVAL: [2700/3883] Data 0.001 (0.001) Elapsed 7m 53s (remain 3m 27s) Loss: 0.0283(0.1196) EVAL: [2718/3883] Data 0.001 (0.001) Elapsed 7m 56s (remain 3m 24s) Loss: 0.2116(0.1196) EVAL: [2736/3883] Data 0.001 (0.001) Elapsed 7m 59s (remain 3m 20s) Loss: 0.0169(0.1198) EVAL: [2754/3883] Data 0.001 (0.001) Elapsed 8m 2s (remain 3m 17s) Loss: 0.0813(0.1203) EVAL: [2772/3883] Data 0.001 (0.001) Elapsed 8m 6s (remain 3m 14s) Loss: 0.1609(0.1203) EVAL: [2790/3883] Data 0.001 (0.001) Elapsed 8m 9s (remain 3m 11s) Loss: 0.0075(0.1202) EVAL: [2808/3883] Data 0.001 (0.001) Elapsed 8m 12s (remain 3m 8s) Loss: 0.2102(0.1202) EVAL: [2826/3883] Data 0.001 (0.001) Elapsed 8m 15s (remain 3m 5s) Loss: 0.1106(0.1201) EVAL: [2844/3883] Data 0.001 (0.001) Elapsed 8m 18s (remain 3m 1s) Loss: 0.1306(0.1201) EVAL: [2862/3883] Data 0.001 (0.001) Elapsed 8m 21s (remain 2m 58s) Loss: 0.3577(0.1201) EVAL: [2880/3883] Data 0.001 (0.001) Elapsed 8m 24s (remain 2m 55s) Loss: 0.0156(0.1199) EVAL: [2898/3883] Data 0.001 (0.001) Elapsed 8m 28s (remain 2m 52s) Loss: 0.2957(0.1199) EVAL: [2916/3883] Data 0.001 (0.001) Elapsed 8m 31s (remain 2m 49s) Loss: 0.1684(0.1199) EVAL: [2934/3883] Data 0.001 (0.001) Elapsed 8m 34s (remain 2m 46s) Loss: 0.2122(0.1199) EVAL: [2952/3883] Data 0.001 (0.001) Elapsed 8m 37s (remain 2m 42s) Loss: 0.0089(0.1199) EVAL: [2970/3883] Data 0.001 (0.001) Elapsed 8m 40s (remain 2m 39s) Loss: 0.0473(0.1199) EVAL: [2988/3883] Data 0.001 (0.001) Elapsed 8m 43s (remain 2m 36s) Loss: 0.0715(0.1200) EVAL: [3006/3883] Data 0.001 (0.001) Elapsed 8m 46s (remain 2m 33s) Loss: 0.0616(0.1198) EVAL: [3024/3883] Data 0.001 (0.001) Elapsed 8m 50s (remain 2m 30s) Loss: 0.3140(0.1199) EVAL: [3042/3883] Data 0.001 (0.001) Elapsed 8m 53s (remain 2m 27s) Loss: 0.0550(0.1197) EVAL: [3060/3883] Data 0.001 (0.001) Elapsed 8m 56s (remain 2m 24s) Loss: 0.1241(0.1198) EVAL: [3078/3883] Data 0.001 (0.001) Elapsed 8m 59s (remain 2m 20s) Loss: 0.2542(0.1197) EVAL: [3096/3883] Data 0.001 (0.001) Elapsed 9m 2s (remain 2m 17s) Loss: 0.5405(0.1197) EVAL: [3114/3883] Data 0.001 (0.001) Elapsed 9m 5s (remain 2m 14s) Loss: 0.1332(0.1197) EVAL: [3132/3883] Data 0.002 (0.001) Elapsed 9m 9s (remain 2m 11s) Loss: 0.0933(0.1196) EVAL: [3150/3883] Data 0.001 (0.001) Elapsed 9m 12s (remain 2m 8s) Loss: 0.0652(0.1195) EVAL: [3168/3883] Data 0.001 (0.001) Elapsed 9m 15s (remain 2m 5s) Loss: 0.2345(0.1194) EVAL: [3186/3883] Data 0.001 (0.001) Elapsed 9m 18s (remain 2m 1s) Loss: 0.0110(0.1192) EVAL: [3204/3883] Data 0.001 (0.001) Elapsed 9m 21s (remain 1m 58s) Loss: 0.0253(0.1192) EVAL: [3222/3883] Data 0.001 (0.001) Elapsed 9m 24s (remain 1m 55s) Loss: 0.1718(0.1192) EVAL: [3240/3883] Data 0.001 (0.001) Elapsed 9m 27s (remain 1m 52s) Loss: 0.4883(0.1192) EVAL: [3258/3883] Data 0.001 (0.001) Elapsed 9m 31s (remain 1m 49s) Loss: 0.0197(0.1192) EVAL: [3276/3883] Data 0.001 (0.001) Elapsed 9m 34s (remain 1m 46s) Loss: 0.0398(0.1192) EVAL: [3294/3883] Data 0.001 (0.001) Elapsed 9m 37s (remain 1m 43s) Loss: 0.1589(0.1192) EVAL: [3312/3883] Data 0.001 (0.001) Elapsed 9m 40s (remain 1m 39s) Loss: 0.0030(0.1193) EVAL: [3330/3883] Data 0.001 (0.001) Elapsed 9m 43s (remain 1m 36s) Loss: 0.2455(0.1191) EVAL: [3348/3883] Data 0.001 (0.001) Elapsed 9m 46s (remain 1m 33s) Loss: 0.0854(0.1191) EVAL: [3366/3883] Data 0.001 (0.001) Elapsed 9m 49s (remain 1m 30s) Loss: 0.0114(0.1191) EVAL: [3384/3883] Data 0.001 (0.001) Elapsed 9m 53s (remain 1m 27s) Loss: 0.1482(0.1192) EVAL: [3402/3883] Data 0.001 (0.001) Elapsed 9m 56s (remain 1m 24s) Loss: 0.1423(0.1194) EVAL: [3420/3883] Data 0.001 (0.001) Elapsed 9m 59s (remain 1m 20s) Loss: 0.1642(0.1192) EVAL: [3438/3883] Data 0.001 (0.001) Elapsed 10m 2s (remain 1m 17s) Loss: 0.1889(0.1193) EVAL: [3456/3883] Data 0.001 (0.001) Elapsed 10m 5s (remain 1m 14s) Loss: 0.3119(0.1194) EVAL: [3474/3883] Data 0.001 (0.001) Elapsed 10m 8s (remain 1m 11s) Loss: 0.0278(0.1195) EVAL: [3492/3883] Data 0.001 (0.001) Elapsed 10m 12s (remain 1m 8s) Loss: 0.0927(0.1195) EVAL: [3510/3883] Data 0.001 (0.001) Elapsed 10m 15s (remain 1m 5s) Loss: 0.0438(0.1194) EVAL: [3528/3883] Data 0.001 (0.001) Elapsed 10m 18s (remain 1m 2s) Loss: 0.6840(0.1197) EVAL: [3546/3883] Data 0.001 (0.001) Elapsed 10m 21s (remain 0m 58s) Loss: 0.2169(0.1197) EVAL: [3564/3883] Data 0.001 (0.001) Elapsed 10m 24s (remain 0m 55s) Loss: 0.0727(0.1194) EVAL: [3582/3883] Data 0.001 (0.001) Elapsed 10m 27s (remain 0m 52s) Loss: 0.0599(0.1195) EVAL: [3600/3883] Data 0.001 (0.001) Elapsed 10m 30s (remain 0m 49s) Loss: 0.0495(0.1194) EVAL: [3618/3883] Data 0.001 (0.001) Elapsed 10m 34s (remain 0m 46s) Loss: 0.1761(0.1194) EVAL: [3636/3883] Data 0.001 (0.001) Elapsed 10m 37s (remain 0m 43s) Loss: 0.0229(0.1195) EVAL: [3654/3883] Data 0.001 (0.001) Elapsed 10m 40s (remain 0m 39s) Loss: 0.0331(0.1194) EVAL: [3672/3883] Data 0.001 (0.001) Elapsed 10m 43s (remain 0m 36s) Loss: 0.4167(0.1195) EVAL: [3690/3883] Data 0.001 (0.001) Elapsed 10m 46s (remain 0m 33s) Loss: 0.0123(0.1196) EVAL: [3708/3883] Data 0.001 (0.001) Elapsed 10m 49s (remain 0m 30s) Loss: 0.0550(0.1198) EVAL: [3726/3883] Data 0.001 (0.001) Elapsed 10m 53s (remain 0m 27s) Loss: 0.1469(0.1197) EVAL: [3744/3883] Data 0.001 (0.001) Elapsed 10m 56s (remain 0m 24s) Loss: 0.1247(0.1197) EVAL: [3762/3883] Data 0.001 (0.001) Elapsed 10m 59s (remain 0m 21s) Loss: 0.0100(0.1196) EVAL: [3780/3883] Data 0.001 (0.001) Elapsed 11m 2s (remain 0m 17s) Loss: 0.0063(0.1195) EVAL: [3798/3883] Data 0.001 (0.001) Elapsed 11m 5s (remain 0m 14s) Loss: 0.0042(0.1197) EVAL: [3816/3883] Data 0.001 (0.001) Elapsed 11m 8s (remain 0m 11s) Loss: 0.0113(0.1197) EVAL: [3834/3883] Data 0.002 (0.001) Elapsed 11m 11s (remain 0m 8s) Loss: 0.0383(0.1198) EVAL: [3852/3883] Data 0.001 (0.001) Elapsed 11m 15s (remain 0m 5s) Loss: 0.2638(0.1198) EVAL: [3870/3883] Data 0.001 (0.001) Elapsed 11m 18s (remain 0m 2s) Loss: 0.0234(0.1196)
Epoch 6 - avg_train_loss: 0.3144 avg_val_loss: 0.1197 time: 1126s Epoch 6 - AUC: 0.9630759811819294 - pAUC: 0.17557517561384284 Epoch 6 - Save Best Score: 0.1756 Model
EVAL: [3882/3883] Data 0.001 (0.001) Elapsed 11m 20s (remain 0m 0s) Loss: 0.2157(0.1197) Epoch: [7][0/650] Data 0.821 (0.821) Elapsed 0m 1s (remain 14m 21s) Loss: 0.1612(0.1612) Grad: 5.7295 LR: 0.000035 Epoch: [7][18/650] Data 0.270 (0.291) Elapsed 0m 13s (remain 7m 30s) Loss: 0.1724(0.2488) Grad: 5.6207 LR: 0.000035 Epoch: [7][36/650] Data 0.267 (0.277) Elapsed 0m 25s (remain 7m 8s) Loss: 0.7155(0.2845) Grad: 15.1702 LR: 0.000035 Epoch: [7][54/650] Data 0.269 (0.274) Elapsed 0m 38s (remain 6m 53s) Loss: 0.3322(0.2922) Grad: 12.9610 LR: 0.000035 Epoch: [7][72/650] Data 0.271 (0.272) Elapsed 0m 50s (remain 6m 39s) Loss: 0.2008(0.2959) Grad: 7.8997 LR: 0.000035 Epoch: [7][90/650] Data 0.269 (0.271) Elapsed 1m 2s (remain 6m 25s) Loss: 0.5616(0.3076) Grad: 11.5798 LR: 0.000035 Epoch: [7][108/650] Data 0.266 (0.270) Elapsed 1m 15s (remain 6m 12s) Loss: 0.5660(0.3085) Grad: 13.9055 LR: 0.000035 Epoch: [7][126/650] Data 0.266 (0.269) Elapsed 1m 27s (remain 5m 59s) Loss: 0.3743(0.3073) Grad: 8.8937 LR: 0.000035 Epoch: [7][144/650] Data 0.270 (0.269) Elapsed 1m 39s (remain 5m 47s) Loss: 0.3292(0.2989) Grad: 7.4580 LR: 0.000035 Epoch: [7][162/650] Data 0.217 (0.268) Elapsed 1m 52s (remain 5m 34s) Loss: 0.3137(0.2995) Grad: 8.5468 LR: 0.000035 Epoch: [7][180/650] Data 0.267 (0.268) Elapsed 2m 4s (remain 5m 22s) Loss: 0.3964(0.3079) Grad: 7.2522 LR: 0.000035 Epoch: [7][198/650] Data 0.267 (0.268) Elapsed 2m 16s (remain 5m 9s) Loss: 0.4010(0.3030) Grad: 10.4140 LR: 0.000035 Epoch: [7][216/650] Data 0.269 (0.268) Elapsed 2m 28s (remain 4m 57s) Loss: 0.4564(0.3009) Grad: 16.9879 LR: 0.000035 Epoch: [7][234/650] Data 0.262 (0.268) Elapsed 2m 41s (remain 4m 44s) Loss: 0.3584(0.3019) Grad: 8.7614 LR: 0.000035 Epoch: [7][252/650] Data 0.267 (0.268) Elapsed 2m 53s (remain 4m 32s) Loss: 0.1436(0.2989) Grad: 4.8792 LR: 0.000035 Epoch: [7][270/650] Data 0.263 (0.267) Elapsed 3m 5s (remain 4m 19s) Loss: 0.0901(0.2963) Grad: 3.4946 LR: 0.000035 Epoch: [7][288/650] Data 0.272 (0.266) Elapsed 3m 18s (remain 4m 7s) Loss: 0.1869(0.2987) Grad: 5.2454 LR: 0.000035 Epoch: [7][306/650] Data 0.264 (0.266) Elapsed 3m 30s (remain 3m 55s) Loss: 0.3532(0.2999) Grad: 12.4663 LR: 0.000035 Epoch: [7][324/650] Data 0.263 (0.266) Elapsed 3m 42s (remain 3m 42s) Loss: 0.2684(0.2972) Grad: 7.3688 LR: 0.000035 Epoch: [7][342/650] Data 0.257 (0.266) Elapsed 3m 55s (remain 3m 30s) Loss: 0.2496(0.2954) Grad: 7.3326 LR: 0.000035 Epoch: [7][360/650] Data 0.271 (0.266) Elapsed 4m 7s (remain 3m 18s) Loss: 0.1261(0.2934) Grad: 4.1121 LR: 0.000035 Epoch: [7][378/650] Data 0.270 (0.266) Elapsed 4m 19s (remain 3m 5s) Loss: 0.3463(0.2930) Grad: 10.4062 LR: 0.000035 Epoch: [7][396/650] Data 0.272 (0.266) Elapsed 4m 32s (remain 2m 53s) Loss: 0.2351(0.2914) Grad: 13.9747 LR: 0.000035 Epoch: [7][414/650] Data 0.272 (0.266) Elapsed 4m 44s (remain 2m 41s) Loss: 0.3755(0.2889) Grad: 9.9085 LR: 0.000035 Epoch: [7][432/650] Data 0.272 (0.266) Elapsed 4m 56s (remain 2m 28s) Loss: 0.5131(0.2889) Grad: 13.5715 LR: 0.000035 Epoch: [7][450/650] Data 0.267 (0.266) Elapsed 5m 9s (remain 2m 16s) Loss: 0.5115(0.2919) Grad: 14.3260 LR: 0.000035 Epoch: [7][468/650] Data 0.272 (0.266) Elapsed 5m 21s (remain 2m 4s) Loss: 0.1944(0.2926) Grad: 8.0784 LR: 0.000035 Epoch: [7][486/650] Data 0.269 (0.266) Elapsed 5m 33s (remain 1m 51s) Loss: 0.3605(0.2924) Grad: 10.9265 LR: 0.000035 Epoch: [7][504/650] Data 0.263 (0.266) Elapsed 5m 45s (remain 1m 39s) Loss: 0.2336(0.2913) Grad: 7.1656 LR: 0.000035 Epoch: [7][522/650] Data 0.259 (0.266) Elapsed 5m 58s (remain 1m 26s) Loss: 0.0984(0.2902) Grad: 3.6238 LR: 0.000035 Epoch: [7][540/650] Data 0.271 (0.266) Elapsed 6m 10s (remain 1m 14s) Loss: 0.2180(0.2894) Grad: 6.0811 LR: 0.000035 Epoch: [7][558/650] Data 0.270 (0.266) Elapsed 6m 22s (remain 1m 2s) Loss: 0.2116(0.2892) Grad: 6.2230 LR: 0.000035 Epoch: [7][576/650] Data 0.238 (0.266) Elapsed 6m 35s (remain 0m 49s) Loss: 0.1135(0.2884) Grad: 4.9512 LR: 0.000035 Epoch: [7][594/650] Data 0.272 (0.266) Elapsed 6m 47s (remain 0m 37s) Loss: 0.2819(0.2886) Grad: 8.3961 LR: 0.000035 Epoch: [7][612/650] Data 0.260 (0.266) Elapsed 6m 59s (remain 0m 25s) Loss: 0.1868(0.2891) Grad: 6.7516 LR: 0.000035 Epoch: [7][630/650] Data 0.272 (0.266) Elapsed 7m 12s (remain 0m 13s) Loss: 0.8211(0.2892) Grad: 15.7491 LR: 0.000035 Epoch: [7][648/650] Data 0.272 (0.266) Elapsed 7m 24s (remain 0m 0s) Loss: 0.2454(0.2885) Grad: 5.6429 LR: 0.000035 Epoch: [7][649/650] Data 0.273 (0.266) Elapsed 7m 25s (remain 0m 0s) Loss: 0.3781(0.2886) Grad: 8.3929 LR: 0.000035 EVAL: [0/3883] Data 0.475 (0.475) Elapsed 0m 0s (remain 42m 15s) Loss: 0.1448(0.1448) EVAL: [18/3883] Data 0.001 (0.026) Elapsed 0m 3s (remain 12m 56s) Loss: 0.1257(0.1032) EVAL: [36/3883] Data 0.001 (0.014) Elapsed 0m 6s (remain 12m 4s) Loss: 0.0713(0.1238) EVAL: [54/3883] Data 0.001 (0.010) Elapsed 0m 10s (remain 11m 44s) Loss: 0.0202(0.1184) EVAL: [72/3883] Data 0.001 (0.008) Elapsed 0m 13s (remain 11m 32s) Loss: 0.2557(0.1350) EVAL: [90/3883] Data 0.001 (0.006) Elapsed 0m 16s (remain 11m 24s) Loss: 0.3096(0.1405) EVAL: [108/3883] Data 0.001 (0.005) Elapsed 0m 19s (remain 11m 17s) Loss: 0.2121(0.1443) EVAL: [126/3883] Data 0.001 (0.005) Elapsed 0m 22s (remain 11m 11s) Loss: 0.3703(0.1498) EVAL: [144/3883] Data 0.001 (0.004) Elapsed 0m 25s (remain 11m 6s) Loss: 0.2624(0.1474) EVAL: [162/3883] Data 0.001 (0.004) Elapsed 0m 29s (remain 11m 2s) Loss: 0.0421(0.1482) EVAL: [180/3883] Data 0.001 (0.004) Elapsed 0m 32s (remain 10m 57s) Loss: 0.0147(0.1443) EVAL: [198/3883] Data 0.001 (0.003) Elapsed 0m 35s (remain 10m 53s) Loss: 0.0701(0.1475) EVAL: [216/3883] Data 0.001 (0.003) Elapsed 0m 38s (remain 10m 49s) Loss: 0.0716(0.1474) EVAL: [234/3883] Data 0.001 (0.003) Elapsed 0m 41s (remain 10m 46s) Loss: 0.1318(0.1460) EVAL: [252/3883] Data 0.001 (0.003) Elapsed 0m 44s (remain 10m 42s) Loss: 0.1274(0.1428) EVAL: [270/3883] Data 0.002 (0.003) Elapsed 0m 47s (remain 10m 38s) Loss: 0.0621(0.1451) EVAL: [288/3883] Data 0.001 (0.003) Elapsed 0m 51s (remain 10m 35s) Loss: 0.3662(0.1474) EVAL: [306/3883] Data 0.001 (0.002) Elapsed 0m 54s (remain 10m 31s) Loss: 0.0571(0.1467) EVAL: [324/3883] Data 0.001 (0.002) Elapsed 0m 57s (remain 10m 28s) Loss: 0.0643(0.1469) EVAL: [342/3883] Data 0.001 (0.002) Elapsed 1m 0s (remain 10m 24s) Loss: 0.0230(0.1488) EVAL: [360/3883] Data 0.001 (0.002) Elapsed 1m 3s (remain 10m 21s) Loss: 0.1482(0.1486) EVAL: [378/3883] Data 0.001 (0.002) Elapsed 1m 6s (remain 10m 17s) Loss: 0.0814(0.1489) EVAL: [396/3883] Data 0.001 (0.002) Elapsed 1m 9s (remain 10m 14s) Loss: 0.3477(0.1487) EVAL: [414/3883] Data 0.001 (0.002) Elapsed 1m 13s (remain 10m 10s) Loss: 0.1741(0.1494) EVAL: [432/3883] Data 0.002 (0.002) Elapsed 1m 16s (remain 10m 7s) Loss: 0.0966(0.1498) EVAL: [450/3883] Data 0.001 (0.002) Elapsed 1m 19s (remain 10m 4s) Loss: 0.1760(0.1502) EVAL: [468/3883] Data 0.001 (0.002) Elapsed 1m 22s (remain 10m 1s) Loss: 0.0746(0.1515) EVAL: [486/3883] Data 0.001 (0.002) Elapsed 1m 25s (remain 9m 57s) Loss: 0.3365(0.1511) EVAL: [504/3883] Data 0.001 (0.002) Elapsed 1m 28s (remain 9m 54s) Loss: 0.0790(0.1513) EVAL: [522/3883] Data 0.001 (0.002) Elapsed 1m 32s (remain 9m 51s) Loss: 0.3282(0.1508) EVAL: [540/3883] Data 0.001 (0.002) Elapsed 1m 35s (remain 9m 47s) Loss: 0.0916(0.1495) EVAL: [558/3883] Data 0.001 (0.002) Elapsed 1m 38s (remain 9m 44s) Loss: 0.3884(0.1496) EVAL: [576/3883] Data 0.001 (0.002) Elapsed 1m 41s (remain 9m 41s) Loss: 0.0251(0.1486) EVAL: [594/3883] Data 0.001 (0.002) Elapsed 1m 44s (remain 9m 38s) Loss: 0.1690(0.1478) EVAL: [612/3883] Data 0.001 (0.002) Elapsed 1m 47s (remain 9m 34s) Loss: 0.0962(0.1473) EVAL: [630/3883] Data 0.002 (0.002) Elapsed 1m 50s (remain 9m 31s) Loss: 0.0494(0.1470) EVAL: [648/3883] Data 0.001 (0.002) Elapsed 1m 54s (remain 9m 28s) Loss: 0.0699(0.1469) EVAL: [666/3883] Data 0.001 (0.002) Elapsed 1m 57s (remain 9m 25s) Loss: 0.1903(0.1467) EVAL: [684/3883] Data 0.001 (0.002) Elapsed 2m 0s (remain 9m 21s) Loss: 0.2393(0.1472) EVAL: [702/3883] Data 0.001 (0.002) Elapsed 2m 3s (remain 9m 18s) Loss: 0.1916(0.1456) EVAL: [720/3883] Data 0.001 (0.002) Elapsed 2m 6s (remain 9m 15s) Loss: 0.1720(0.1450) EVAL: [738/3883] Data 0.001 (0.002) Elapsed 2m 9s (remain 9m 12s) Loss: 0.0194(0.1455) EVAL: [756/3883] Data 0.001 (0.002) Elapsed 2m 12s (remain 9m 9s) Loss: 0.1851(0.1455) EVAL: [774/3883] Data 0.001 (0.002) Elapsed 2m 16s (remain 9m 5s) Loss: 0.0723(0.1450) EVAL: [792/3883] Data 0.003 (0.002) Elapsed 2m 19s (remain 9m 2s) Loss: 0.1385(0.1455) EVAL: [810/3883] Data 0.001 (0.002) Elapsed 2m 22s (remain 8m 59s) Loss: 0.0397(0.1455) EVAL: [828/3883] Data 0.001 (0.002) Elapsed 2m 25s (remain 8m 56s) Loss: 0.0311(0.1457) EVAL: [846/3883] Data 0.001 (0.002) Elapsed 2m 28s (remain 8m 53s) Loss: 0.0646(0.1459) EVAL: [864/3883] Data 0.001 (0.001) Elapsed 2m 31s (remain 8m 49s) Loss: 0.3254(0.1465) EVAL: [882/3883] Data 0.001 (0.001) Elapsed 2m 35s (remain 8m 46s) Loss: 0.0056(0.1465) EVAL: [900/3883] Data 0.001 (0.001) Elapsed 2m 38s (remain 8m 43s) Loss: 0.2954(0.1461) EVAL: [918/3883] Data 0.001 (0.001) Elapsed 2m 41s (remain 8m 40s) Loss: 0.1702(0.1460) EVAL: [936/3883] Data 0.001 (0.001) Elapsed 2m 44s (remain 8m 37s) Loss: 0.2105(0.1462) EVAL: [954/3883] Data 0.001 (0.001) Elapsed 2m 47s (remain 8m 33s) Loss: 0.0736(0.1458) EVAL: [972/3883] Data 0.001 (0.001) Elapsed 2m 50s (remain 8m 30s) Loss: 0.0447(0.1460) EVAL: [990/3883] Data 0.001 (0.001) Elapsed 2m 53s (remain 8m 27s) Loss: 0.1096(0.1455) EVAL: [1008/3883] Data 0.001 (0.001) Elapsed 2m 57s (remain 8m 24s) Loss: 0.3416(0.1460) EVAL: [1026/3883] Data 0.001 (0.001) Elapsed 3m 0s (remain 8m 21s) Loss: 0.0340(0.1453) EVAL: [1044/3883] Data 0.001 (0.001) Elapsed 3m 3s (remain 8m 18s) Loss: 0.0195(0.1453) EVAL: [1062/3883] Data 0.001 (0.001) Elapsed 3m 6s (remain 8m 14s) Loss: 0.0971(0.1450) EVAL: [1080/3883] Data 0.001 (0.001) Elapsed 3m 9s (remain 8m 11s) Loss: 0.1188(0.1450) EVAL: [1098/3883] Data 0.001 (0.001) Elapsed 3m 12s (remain 8m 8s) Loss: 0.1510(0.1446) EVAL: [1116/3883] Data 0.001 (0.001) Elapsed 3m 15s (remain 8m 5s) Loss: 0.1118(0.1447) EVAL: [1134/3883] Data 0.001 (0.001) Elapsed 3m 19s (remain 8m 2s) Loss: 0.0248(0.1443) EVAL: [1152/3883] Data 0.001 (0.001) Elapsed 3m 22s (remain 7m 58s) Loss: 0.1688(0.1442) EVAL: [1170/3883] Data 0.001 (0.001) Elapsed 3m 25s (remain 7m 55s) Loss: 0.0970(0.1436) EVAL: [1188/3883] Data 0.001 (0.001) Elapsed 3m 28s (remain 7m 52s) Loss: 0.1000(0.1437) EVAL: [1206/3883] Data 0.001 (0.001) Elapsed 3m 31s (remain 7m 49s) Loss: 0.0272(0.1434) EVAL: [1224/3883] Data 0.001 (0.001) Elapsed 3m 34s (remain 7m 46s) Loss: 0.1667(0.1436) EVAL: [1242/3883] Data 0.001 (0.001) Elapsed 3m 38s (remain 7m 43s) Loss: 0.1179(0.1437) EVAL: [1260/3883] Data 0.002 (0.001) Elapsed 3m 41s (remain 7m 39s) Loss: 0.5210(0.1441) EVAL: [1278/3883] Data 0.001 (0.001) Elapsed 3m 44s (remain 7m 36s) Loss: 0.2604(0.1446) EVAL: [1296/3883] Data 0.002 (0.001) Elapsed 3m 47s (remain 7m 33s) Loss: 0.0227(0.1448) EVAL: [1314/3883] Data 0.001 (0.001) Elapsed 3m 50s (remain 7m 30s) Loss: 0.0513(0.1452) EVAL: [1332/3883] Data 0.002 (0.001) Elapsed 3m 53s (remain 7m 27s) Loss: 0.1692(0.1449) EVAL: [1350/3883] Data 0.001 (0.001) Elapsed 3m 56s (remain 7m 24s) Loss: 0.0225(0.1450) EVAL: [1368/3883] Data 0.001 (0.001) Elapsed 4m 0s (remain 7m 20s) Loss: 0.3262(0.1457) EVAL: [1386/3883] Data 0.001 (0.001) Elapsed 4m 3s (remain 7m 17s) Loss: 0.0735(0.1454) EVAL: [1404/3883] Data 0.001 (0.001) Elapsed 4m 6s (remain 7m 14s) Loss: 0.0821(0.1455) EVAL: [1422/3883] Data 0.001 (0.001) Elapsed 4m 9s (remain 7m 11s) Loss: 0.1582(0.1456) EVAL: [1440/3883] Data 0.002 (0.001) Elapsed 4m 12s (remain 7m 8s) Loss: 0.1308(0.1451) EVAL: [1458/3883] Data 0.001 (0.001) Elapsed 4m 15s (remain 7m 5s) Loss: 0.0395(0.1450) EVAL: [1476/3883] Data 0.001 (0.001) Elapsed 4m 18s (remain 7m 1s) Loss: 0.0714(0.1448) EVAL: [1494/3883] Data 0.001 (0.001) Elapsed 4m 22s (remain 6m 58s) Loss: 0.2972(0.1446) EVAL: [1512/3883] Data 0.004 (0.001) Elapsed 4m 25s (remain 6m 55s) Loss: 0.3462(0.1445) EVAL: [1530/3883] Data 0.001 (0.001) Elapsed 4m 28s (remain 6m 52s) Loss: 0.3383(0.1448) EVAL: [1548/3883] Data 0.001 (0.001) Elapsed 4m 31s (remain 6m 49s) Loss: 0.0908(0.1445) EVAL: [1566/3883] Data 0.001 (0.001) Elapsed 4m 34s (remain 6m 46s) Loss: 0.2241(0.1442) EVAL: [1584/3883] Data 0.001 (0.001) Elapsed 4m 37s (remain 6m 42s) Loss: 0.0186(0.1442) EVAL: [1602/3883] Data 0.001 (0.001) Elapsed 4m 41s (remain 6m 39s) Loss: 0.3535(0.1444) EVAL: [1620/3883] Data 0.001 (0.001) Elapsed 4m 44s (remain 6m 36s) Loss: 0.2238(0.1451) EVAL: [1638/3883] Data 0.001 (0.001) Elapsed 4m 47s (remain 6m 33s) Loss: 0.2715(0.1454) EVAL: [1656/3883] Data 0.001 (0.001) Elapsed 4m 50s (remain 6m 30s) Loss: 0.1248(0.1454) EVAL: [1674/3883] Data 0.001 (0.001) Elapsed 4m 53s (remain 6m 27s) Loss: 0.2577(0.1455) EVAL: [1692/3883] Data 0.001 (0.001) Elapsed 4m 56s (remain 6m 23s) Loss: 0.0355(0.1452) EVAL: [1710/3883] Data 0.001 (0.001) Elapsed 4m 59s (remain 6m 20s) Loss: 0.0729(0.1455) EVAL: [1728/3883] Data 0.001 (0.001) Elapsed 5m 3s (remain 6m 17s) Loss: 0.0598(0.1456) EVAL: [1746/3883] Data 0.001 (0.001) Elapsed 5m 6s (remain 6m 14s) Loss: 0.1460(0.1455) EVAL: [1764/3883] Data 0.001 (0.001) Elapsed 5m 9s (remain 6m 11s) Loss: 0.1734(0.1458) EVAL: [1782/3883] Data 0.001 (0.001) Elapsed 5m 12s (remain 6m 8s) Loss: 0.4779(0.1459) EVAL: [1800/3883] Data 0.001 (0.001) Elapsed 5m 15s (remain 6m 4s) Loss: 0.2189(0.1460) EVAL: [1818/3883] Data 0.001 (0.001) Elapsed 5m 18s (remain 6m 1s) Loss: 0.0204(0.1459) EVAL: [1836/3883] Data 0.001 (0.001) Elapsed 5m 21s (remain 5m 58s) Loss: 0.0513(0.1462) EVAL: [1854/3883] Data 0.001 (0.001) Elapsed 5m 25s (remain 5m 55s) Loss: 0.0081(0.1463) EVAL: [1872/3883] Data 0.001 (0.001) Elapsed 5m 28s (remain 5m 52s) Loss: 0.3009(0.1462) EVAL: [1890/3883] Data 0.001 (0.001) Elapsed 5m 31s (remain 5m 49s) Loss: 0.0316(0.1462) EVAL: [1908/3883] Data 0.001 (0.001) Elapsed 5m 34s (remain 5m 45s) Loss: 0.1280(0.1462) EVAL: [1926/3883] Data 0.001 (0.001) Elapsed 5m 37s (remain 5m 42s) Loss: 0.0212(0.1461) EVAL: [1944/3883] Data 0.001 (0.001) Elapsed 5m 40s (remain 5m 39s) Loss: 0.0161(0.1462) EVAL: [1962/3883] Data 0.001 (0.001) Elapsed 5m 44s (remain 5m 36s) Loss: 0.2127(0.1466) EVAL: [1980/3883] Data 0.001 (0.001) Elapsed 5m 47s (remain 5m 33s) Loss: 0.2378(0.1466) EVAL: [1998/3883] Data 0.001 (0.001) Elapsed 5m 50s (remain 5m 30s) Loss: 0.1003(0.1466) EVAL: [2016/3883] Data 0.001 (0.001) Elapsed 5m 53s (remain 5m 27s) Loss: 0.0159(0.1469) EVAL: [2034/3883] Data 0.001 (0.001) Elapsed 5m 56s (remain 5m 23s) Loss: 0.0318(0.1468) EVAL: [2052/3883] Data 0.001 (0.001) Elapsed 5m 59s (remain 5m 20s) Loss: 0.3910(0.1466) EVAL: [2070/3883] Data 0.001 (0.001) Elapsed 6m 2s (remain 5m 17s) Loss: 0.3007(0.1470) EVAL: [2088/3883] Data 0.002 (0.001) Elapsed 6m 6s (remain 5m 14s) Loss: 0.0154(0.1467) EVAL: [2106/3883] Data 0.001 (0.001) Elapsed 6m 9s (remain 5m 11s) Loss: 0.1147(0.1466) EVAL: [2124/3883] Data 0.001 (0.001) Elapsed 6m 12s (remain 5m 8s) Loss: 0.0700(0.1461) EVAL: [2142/3883] Data 0.001 (0.001) Elapsed 6m 15s (remain 5m 4s) Loss: 0.0586(0.1460) EVAL: [2160/3883] Data 0.001 (0.001) Elapsed 6m 18s (remain 5m 1s) Loss: 0.1286(0.1456) EVAL: [2178/3883] Data 0.001 (0.001) Elapsed 6m 21s (remain 4m 58s) Loss: 0.0095(0.1453) EVAL: [2196/3883] Data 0.001 (0.001) Elapsed 6m 25s (remain 4m 55s) Loss: 0.0699(0.1452) EVAL: [2214/3883] Data 0.001 (0.001) Elapsed 6m 28s (remain 4m 52s) Loss: 0.1051(0.1452) EVAL: [2232/3883] Data 0.001 (0.001) Elapsed 6m 31s (remain 4m 49s) Loss: 0.2346(0.1453) EVAL: [2250/3883] Data 0.001 (0.001) Elapsed 6m 34s (remain 4m 45s) Loss: 0.6499(0.1455) EVAL: [2268/3883] Data 0.001 (0.001) Elapsed 6m 37s (remain 4m 42s) Loss: 0.0751(0.1457) EVAL: [2286/3883] Data 0.001 (0.001) Elapsed 6m 40s (remain 4m 39s) Loss: 0.2255(0.1457) EVAL: [2304/3883] Data 0.001 (0.001) Elapsed 6m 43s (remain 4m 36s) Loss: 0.2285(0.1456) EVAL: [2322/3883] Data 0.001 (0.001) Elapsed 6m 47s (remain 4m 33s) Loss: 0.0221(0.1455) EVAL: [2340/3883] Data 0.001 (0.001) Elapsed 6m 50s (remain 4m 30s) Loss: 0.1367(0.1455) EVAL: [2358/3883] Data 0.001 (0.001) Elapsed 6m 53s (remain 4m 27s) Loss: 0.3123(0.1453) EVAL: [2376/3883] Data 0.001 (0.001) Elapsed 6m 56s (remain 4m 23s) Loss: 0.2926(0.1455) EVAL: [2394/3883] Data 0.001 (0.001) Elapsed 6m 59s (remain 4m 20s) Loss: 0.3914(0.1453) EVAL: [2412/3883] Data 0.002 (0.001) Elapsed 7m 2s (remain 4m 17s) Loss: 0.2428(0.1452) EVAL: [2430/3883] Data 0.001 (0.001) Elapsed 7m 5s (remain 4m 14s) Loss: 0.0446(0.1450) EVAL: [2448/3883] Data 0.001 (0.001) Elapsed 7m 9s (remain 4m 11s) Loss: 0.0977(0.1447) EVAL: [2466/3883] Data 0.001 (0.001) Elapsed 7m 12s (remain 4m 8s) Loss: 0.0916(0.1446) EVAL: [2484/3883] Data 0.001 (0.001) Elapsed 7m 15s (remain 4m 4s) Loss: 0.3714(0.1447) EVAL: [2502/3883] Data 0.001 (0.001) Elapsed 7m 18s (remain 4m 1s) Loss: 0.0582(0.1450) EVAL: [2520/3883] Data 0.001 (0.001) Elapsed 7m 21s (remain 3m 58s) Loss: 0.0471(0.1446) EVAL: [2538/3883] Data 0.001 (0.001) Elapsed 7m 24s (remain 3m 55s) Loss: 0.0880(0.1450) EVAL: [2556/3883] Data 0.001 (0.001) Elapsed 7m 28s (remain 3m 52s) Loss: 0.5781(0.1451) EVAL: [2574/3883] Data 0.001 (0.001) Elapsed 7m 31s (remain 3m 49s) Loss: 0.6286(0.1451) EVAL: [2592/3883] Data 0.001 (0.001) Elapsed 7m 34s (remain 3m 46s) Loss: 0.0596(0.1453) EVAL: [2610/3883] Data 0.002 (0.001) Elapsed 7m 37s (remain 3m 42s) Loss: 0.1816(0.1455) EVAL: [2628/3883] Data 0.001 (0.001) Elapsed 7m 40s (remain 3m 39s) Loss: 0.0436(0.1456) EVAL: [2646/3883] Data 0.001 (0.001) Elapsed 7m 43s (remain 3m 36s) Loss: 0.2813(0.1454) EVAL: [2664/3883] Data 0.001 (0.001) Elapsed 7m 46s (remain 3m 33s) Loss: 0.1067(0.1453) EVAL: [2682/3883] Data 0.001 (0.001) Elapsed 7m 50s (remain 3m 30s) Loss: 0.1723(0.1452) EVAL: [2700/3883] Data 0.001 (0.001) Elapsed 7m 53s (remain 3m 27s) Loss: 0.0946(0.1452) EVAL: [2718/3883] Data 0.001 (0.001) Elapsed 7m 56s (remain 3m 23s) Loss: 0.2067(0.1452) EVAL: [2736/3883] Data 0.001 (0.001) Elapsed 7m 59s (remain 3m 20s) Loss: 0.0383(0.1454) EVAL: [2754/3883] Data 0.001 (0.001) Elapsed 8m 2s (remain 3m 17s) Loss: 0.0946(0.1459) EVAL: [2772/3883] Data 0.002 (0.001) Elapsed 8m 5s (remain 3m 14s) Loss: 0.1875(0.1461) EVAL: [2790/3883] Data 0.001 (0.001) Elapsed 8m 8s (remain 3m 11s) Loss: 0.0398(0.1460) EVAL: [2808/3883] Data 0.001 (0.001) Elapsed 8m 12s (remain 3m 8s) Loss: 0.2054(0.1460) EVAL: [2826/3883] Data 0.001 (0.001) Elapsed 8m 15s (remain 3m 5s) Loss: 0.1293(0.1458) EVAL: [2844/3883] Data 0.001 (0.001) Elapsed 8m 18s (remain 3m 1s) Loss: 0.1667(0.1458) EVAL: [2862/3883] Data 0.001 (0.001) Elapsed 8m 21s (remain 2m 58s) Loss: 0.3024(0.1458) EVAL: [2880/3883] Data 0.001 (0.001) Elapsed 8m 24s (remain 2m 55s) Loss: 0.0246(0.1455) EVAL: [2898/3883] Data 0.001 (0.001) Elapsed 8m 27s (remain 2m 52s) Loss: 0.2965(0.1456) EVAL: [2916/3883] Data 0.001 (0.001) Elapsed 8m 31s (remain 2m 49s) Loss: 0.2060(0.1456) EVAL: [2934/3883] Data 0.001 (0.001) Elapsed 8m 34s (remain 2m 46s) Loss: 0.2619(0.1456) EVAL: [2952/3883] Data 0.001 (0.001) Elapsed 8m 37s (remain 2m 42s) Loss: 0.0173(0.1456) EVAL: [2970/3883] Data 0.001 (0.001) Elapsed 8m 40s (remain 2m 39s) Loss: 0.0544(0.1455) EVAL: [2988/3883] Data 0.001 (0.001) Elapsed 8m 43s (remain 2m 36s) Loss: 0.0998(0.1456) EVAL: [3006/3883] Data 0.001 (0.001) Elapsed 8m 46s (remain 2m 33s) Loss: 0.1103(0.1455) EVAL: [3024/3883] Data 0.001 (0.001) Elapsed 8m 49s (remain 2m 30s) Loss: 0.3508(0.1456) EVAL: [3042/3883] Data 0.001 (0.001) Elapsed 8m 53s (remain 2m 27s) Loss: 0.0529(0.1454) EVAL: [3060/3883] Data 0.001 (0.001) Elapsed 8m 56s (remain 2m 24s) Loss: 0.1617(0.1455) EVAL: [3078/3883] Data 0.001 (0.001) Elapsed 8m 59s (remain 2m 20s) Loss: 0.2239(0.1455) EVAL: [3096/3883] Data 0.001 (0.001) Elapsed 9m 2s (remain 2m 17s) Loss: 0.6479(0.1456) EVAL: [3114/3883] Data 0.001 (0.001) Elapsed 9m 5s (remain 2m 14s) Loss: 0.1604(0.1455) EVAL: [3132/3883] Data 0.001 (0.001) Elapsed 9m 8s (remain 2m 11s) Loss: 0.0970(0.1454) EVAL: [3150/3883] Data 0.001 (0.001) Elapsed 9m 11s (remain 2m 8s) Loss: 0.0931(0.1453) EVAL: [3168/3883] Data 0.001 (0.001) Elapsed 9m 15s (remain 2m 5s) Loss: 0.3334(0.1452) EVAL: [3186/3883] Data 0.001 (0.001) Elapsed 9m 18s (remain 2m 1s) Loss: 0.0168(0.1450) EVAL: [3204/3883] Data 0.001 (0.001) Elapsed 9m 21s (remain 1m 58s) Loss: 0.0597(0.1450) EVAL: [3222/3883] Data 0.001 (0.001) Elapsed 9m 24s (remain 1m 55s) Loss: 0.3116(0.1450) EVAL: [3240/3883] Data 0.001 (0.001) Elapsed 9m 27s (remain 1m 52s) Loss: 0.5216(0.1451) EVAL: [3258/3883] Data 0.001 (0.001) Elapsed 9m 30s (remain 1m 49s) Loss: 0.0224(0.1450) EVAL: [3276/3883] Data 0.001 (0.001) Elapsed 9m 34s (remain 1m 46s) Loss: 0.1087(0.1449) EVAL: [3294/3883] Data 0.001 (0.001) Elapsed 9m 37s (remain 1m 43s) Loss: 0.1454(0.1449) EVAL: [3312/3883] Data 0.001 (0.001) Elapsed 9m 40s (remain 1m 39s) Loss: 0.0083(0.1450) EVAL: [3330/3883] Data 0.001 (0.001) Elapsed 9m 43s (remain 1m 36s) Loss: 0.2845(0.1448) EVAL: [3348/3883] Data 0.001 (0.001) Elapsed 9m 46s (remain 1m 33s) Loss: 0.0731(0.1449) EVAL: [3366/3883] Data 0.001 (0.001) Elapsed 9m 49s (remain 1m 30s) Loss: 0.0281(0.1448) EVAL: [3384/3883] Data 0.001 (0.001) Elapsed 9m 52s (remain 1m 27s) Loss: 0.1612(0.1449) EVAL: [3402/3883] Data 0.001 (0.001) Elapsed 9m 56s (remain 1m 24s) Loss: 0.1911(0.1450) EVAL: [3420/3883] Data 0.001 (0.001) Elapsed 9m 59s (remain 1m 20s) Loss: 0.2325(0.1449) EVAL: [3438/3883] Data 0.001 (0.001) Elapsed 10m 2s (remain 1m 17s) Loss: 0.2356(0.1451) EVAL: [3456/3883] Data 0.001 (0.001) Elapsed 10m 5s (remain 1m 14s) Loss: 0.3399(0.1453) EVAL: [3474/3883] Data 0.001 (0.001) Elapsed 10m 8s (remain 1m 11s) Loss: 0.0534(0.1453) EVAL: [3492/3883] Data 0.001 (0.001) Elapsed 10m 11s (remain 1m 8s) Loss: 0.1249(0.1454) EVAL: [3510/3883] Data 0.001 (0.001) Elapsed 10m 15s (remain 1m 5s) Loss: 0.0685(0.1453) EVAL: [3528/3883] Data 0.001 (0.001) Elapsed 10m 18s (remain 1m 2s) Loss: 0.7123(0.1456) EVAL: [3546/3883] Data 0.001 (0.001) Elapsed 10m 21s (remain 0m 58s) Loss: 0.2773(0.1456) EVAL: [3564/3883] Data 0.001 (0.001) Elapsed 10m 24s (remain 0m 55s) Loss: 0.1855(0.1454) EVAL: [3582/3883] Data 0.001 (0.001) Elapsed 10m 27s (remain 0m 52s) Loss: 0.0971(0.1454) EVAL: [3600/3883] Data 0.001 (0.001) Elapsed 10m 30s (remain 0m 49s) Loss: 0.0804(0.1454) EVAL: [3618/3883] Data 0.001 (0.001) Elapsed 10m 33s (remain 0m 46s) Loss: 0.2167(0.1454) EVAL: [3636/3883] Data 0.001 (0.001) Elapsed 10m 37s (remain 0m 43s) Loss: 0.0445(0.1455) EVAL: [3654/3883] Data 0.001 (0.001) Elapsed 10m 40s (remain 0m 39s) Loss: 0.0644(0.1454) EVAL: [3672/3883] Data 0.001 (0.001) Elapsed 10m 43s (remain 0m 36s) Loss: 0.4958(0.1456) EVAL: [3690/3883] Data 0.001 (0.001) Elapsed 10m 46s (remain 0m 33s) Loss: 0.0485(0.1457) EVAL: [3708/3883] Data 0.001 (0.001) Elapsed 10m 49s (remain 0m 30s) Loss: 0.0757(0.1458) EVAL: [3726/3883] Data 0.001 (0.001) Elapsed 10m 52s (remain 0m 27s) Loss: 0.2444(0.1458) EVAL: [3744/3883] Data 0.002 (0.001) Elapsed 10m 56s (remain 0m 24s) Loss: 0.1375(0.1458) EVAL: [3762/3883] Data 0.001 (0.001) Elapsed 10m 59s (remain 0m 21s) Loss: 0.0313(0.1457) EVAL: [3780/3883] Data 0.001 (0.001) Elapsed 11m 2s (remain 0m 17s) Loss: 0.0116(0.1456) EVAL: [3798/3883] Data 0.001 (0.001) Elapsed 11m 5s (remain 0m 14s) Loss: 0.0133(0.1457) EVAL: [3816/3883] Data 0.001 (0.001) Elapsed 11m 8s (remain 0m 11s) Loss: 0.0298(0.1457) EVAL: [3834/3883] Data 0.001 (0.001) Elapsed 11m 11s (remain 0m 8s) Loss: 0.0490(0.1458) EVAL: [3852/3883] Data 0.001 (0.001) Elapsed 11m 14s (remain 0m 5s) Loss: 0.3004(0.1458) EVAL: [3870/3883] Data 0.001 (0.001) Elapsed 11m 18s (remain 0m 2s) Loss: 0.0393(0.1456)
Epoch 7 - avg_train_loss: 0.2886 avg_val_loss: 0.1456 time: 1126s Epoch 7 - AUC: 0.9630336620051126 - pAUC: 0.17501879658868763
EVAL: [3882/3883] Data 0.001 (0.001) Elapsed 11m 20s (remain 0m 0s) Loss: 0.4468(0.1456) Epoch: [8][0/650] Data 0.873 (0.873) Elapsed 0m 1s (remain 14m 20s) Loss: 0.2792(0.2792) Grad: 10.2268 LR: 0.000021 Epoch: [8][18/650] Data 0.246 (0.294) Elapsed 0m 13s (remain 7m 31s) Loss: 0.0727(0.2962) Grad: 4.0751 LR: 0.000021 Epoch: [8][36/650] Data 0.263 (0.281) Elapsed 0m 25s (remain 7m 9s) Loss: 0.4277(0.2959) Grad: 9.9074 LR: 0.000021 Epoch: [8][54/650] Data 0.271 (0.276) Elapsed 0m 38s (remain 6m 53s) Loss: 0.3205(0.3050) Grad: 11.2686 LR: 0.000021 Epoch: [8][72/650] Data 0.268 (0.273) Elapsed 0m 50s (remain 6m 39s) Loss: 0.1730(0.3062) Grad: 8.0313 LR: 0.000021 Epoch: [8][90/650] Data 0.269 (0.272) Elapsed 1m 2s (remain 6m 25s) Loss: 0.3141(0.3067) Grad: 11.0121 LR: 0.000021 Epoch: [8][108/650] Data 0.270 (0.271) Elapsed 1m 15s (remain 6m 12s) Loss: 0.2129(0.3038) Grad: 10.4008 LR: 0.000021 Epoch: [8][126/650] Data 0.271 (0.271) Elapsed 1m 27s (remain 6m 0s) Loss: 0.5752(0.3019) Grad: 10.9308 LR: 0.000021 Epoch: [8][144/650] Data 0.272 (0.270) Elapsed 1m 39s (remain 5m 47s) Loss: 0.2908(0.2975) Grad: 10.3126 LR: 0.000021 Epoch: [8][162/650] Data 0.265 (0.270) Elapsed 1m 52s (remain 5m 34s) Loss: 0.3415(0.2936) Grad: 8.3322 LR: 0.000021 Epoch: [8][180/650] Data 0.260 (0.269) Elapsed 2m 4s (remain 5m 22s) Loss: 0.2925(0.3028) Grad: 6.4062 LR: 0.000021 Epoch: [8][198/650] Data 0.264 (0.269) Elapsed 2m 16s (remain 5m 9s) Loss: 0.2568(0.3009) Grad: 9.4483 LR: 0.000021 Epoch: [8][216/650] Data 0.271 (0.269) Elapsed 2m 28s (remain 4m 57s) Loss: 0.5663(0.2988) Grad: 11.4609 LR: 0.000021 Epoch: [8][234/650] Data 0.267 (0.268) Elapsed 2m 41s (remain 4m 44s) Loss: 0.2216(0.3038) Grad: 7.8575 LR: 0.000021 Epoch: [8][252/650] Data 0.268 (0.268) Elapsed 2m 53s (remain 4m 32s) Loss: 0.1475(0.2997) Grad: 4.8561 LR: 0.000021 Epoch: [8][270/650] Data 0.257 (0.268) Elapsed 3m 5s (remain 4m 20s) Loss: 0.1878(0.2969) Grad: 6.0573 LR: 0.000021 Epoch: [8][288/650] Data 0.245 (0.268) Elapsed 3m 18s (remain 4m 7s) Loss: 0.1843(0.2954) Grad: 7.6893 LR: 0.000021 Epoch: [8][306/650] Data 0.270 (0.268) Elapsed 3m 30s (remain 3m 55s) Loss: 0.2635(0.2950) Grad: 9.8591 LR: 0.000021 Epoch: [8][324/650] Data 0.265 (0.268) Elapsed 3m 42s (remain 3m 42s) Loss: 0.1655(0.2929) Grad: 6.1340 LR: 0.000021 Epoch: [8][342/650] Data 0.271 (0.268) Elapsed 3m 55s (remain 3m 30s) Loss: 0.4283(0.2915) Grad: 14.1310 LR: 0.000021 Epoch: [8][360/650] Data 0.267 (0.267) Elapsed 4m 7s (remain 3m 18s) Loss: 0.2797(0.2907) Grad: 10.3024 LR: 0.000021 Epoch: [8][378/650] Data 0.264 (0.267) Elapsed 4m 19s (remain 3m 5s) Loss: 0.1454(0.2892) Grad: 6.6493 LR: 0.000021 Epoch: [8][396/650] Data 0.266 (0.267) Elapsed 4m 32s (remain 2m 53s) Loss: 0.3242(0.2889) Grad: 10.5415 LR: 0.000021 Epoch: [8][414/650] Data 0.270 (0.266) Elapsed 4m 44s (remain 2m 41s) Loss: 0.1584(0.2855) Grad: 5.0831 LR: 0.000021 Epoch: [8][432/650] Data 0.270 (0.266) Elapsed 4m 56s (remain 2m 28s) Loss: 0.5808(0.2872) Grad: 14.7858 LR: 0.000021 Epoch: [8][450/650] Data 0.264 (0.266) Elapsed 5m 9s (remain 2m 16s) Loss: 0.3834(0.2865) Grad: 10.4115 LR: 0.000021 Epoch: [8][468/650] Data 0.271 (0.266) Elapsed 5m 21s (remain 2m 4s) Loss: 0.1216(0.2855) Grad: 8.7635 LR: 0.000021 Epoch: [8][486/650] Data 0.263 (0.266) Elapsed 5m 33s (remain 1m 51s) Loss: 0.2855(0.2885) Grad: 8.0948 LR: 0.000021 Epoch: [8][504/650] Data 0.270 (0.266) Elapsed 5m 45s (remain 1m 39s) Loss: 0.1560(0.2872) Grad: 5.3591 LR: 0.000021 Epoch: [8][522/650] Data 0.272 (0.266) Elapsed 5m 58s (remain 1m 26s) Loss: 0.1662(0.2871) Grad: 7.6691 LR: 0.000021 Epoch: [8][540/650] Data 0.271 (0.266) Elapsed 6m 10s (remain 1m 14s) Loss: 0.4267(0.2865) Grad: 10.7915 LR: 0.000021 Epoch: [8][558/650] Data 0.271 (0.266) Elapsed 6m 22s (remain 1m 2s) Loss: 0.2560(0.2869) Grad: 7.7097 LR: 0.000021 Epoch: [8][576/650] Data 0.269 (0.266) Elapsed 6m 35s (remain 0m 49s) Loss: 0.1878(0.2853) Grad: 5.7360 LR: 0.000021 Epoch: [8][594/650] Data 0.263 (0.266) Elapsed 6m 47s (remain 0m 37s) Loss: 0.5244(0.2854) Grad: 11.0146 LR: 0.000021 Epoch: [8][612/650] Data 0.271 (0.266) Elapsed 6m 59s (remain 0m 25s) Loss: 0.4782(0.2861) Grad: 10.7222 LR: 0.000021 Epoch: [8][630/650] Data 0.265 (0.266) Elapsed 7m 12s (remain 0m 13s) Loss: 0.7181(0.2857) Grad: 16.2429 LR: 0.000021 Epoch: [8][648/650] Data 0.273 (0.266) Elapsed 7m 24s (remain 0m 0s) Loss: 0.1003(0.2849) Grad: 3.6371 LR: 0.000021 Epoch: [8][649/650] Data 0.273 (0.266) Elapsed 7m 25s (remain 0m 0s) Loss: 0.2135(0.2848) Grad: 5.0881 LR: 0.000021 EVAL: [0/3883] Data 0.526 (0.526) Elapsed 0m 0s (remain 45m 30s) Loss: 0.1214(0.1214) EVAL: [18/3883] Data 0.001 (0.029) Elapsed 0m 3s (remain 13m 6s) Loss: 0.0971(0.0917) EVAL: [36/3883] Data 0.001 (0.015) Elapsed 0m 7s (remain 12m 9s) Loss: 0.0528(0.1009) EVAL: [54/3883] Data 0.001 (0.011) Elapsed 0m 10s (remain 11m 47s) Loss: 0.0162(0.0975) EVAL: [72/3883] Data 0.001 (0.008) Elapsed 0m 13s (remain 11m 34s) Loss: 0.2502(0.1117) EVAL: [90/3883] Data 0.001 (0.007) Elapsed 0m 16s (remain 11m 26s) Loss: 0.2226(0.1168) EVAL: [108/3883] Data 0.001 (0.006) Elapsed 0m 19s (remain 11m 19s) Loss: 0.1690(0.1192) EVAL: [126/3883] Data 0.001 (0.005) Elapsed 0m 22s (remain 11m 13s) Loss: 0.3011(0.1223) EVAL: [144/3883] Data 0.001 (0.005) Elapsed 0m 25s (remain 11m 8s) Loss: 0.1659(0.1185) EVAL: [162/3883] Data 0.001 (0.004) Elapsed 0m 29s (remain 11m 3s) Loss: 0.0256(0.1189) EVAL: [180/3883] Data 0.001 (0.004) Elapsed 0m 32s (remain 10m 58s) Loss: 0.0082(0.1160) EVAL: [198/3883] Data 0.001 (0.004) Elapsed 0m 35s (remain 10m 54s) Loss: 0.0314(0.1199) EVAL: [216/3883] Data 0.001 (0.003) Elapsed 0m 38s (remain 10m 50s) Loss: 0.0655(0.1207) EVAL: [234/3883] Data 0.001 (0.003) Elapsed 0m 41s (remain 10m 46s) Loss: 0.1108(0.1200) EVAL: [252/3883] Data 0.001 (0.003) Elapsed 0m 44s (remain 10m 42s) Loss: 0.0682(0.1171) EVAL: [270/3883] Data 0.001 (0.003) Elapsed 0m 47s (remain 10m 39s) Loss: 0.0660(0.1190) EVAL: [288/3883] Data 0.001 (0.003) Elapsed 0m 51s (remain 10m 35s) Loss: 0.3260(0.1208) EVAL: [306/3883] Data 0.001 (0.003) Elapsed 0m 54s (remain 10m 32s) Loss: 0.0473(0.1205) EVAL: [324/3883] Data 0.001 (0.003) Elapsed 0m 57s (remain 10m 28s) Loss: 0.0577(0.1201) EVAL: [342/3883] Data 0.001 (0.002) Elapsed 1m 0s (remain 10m 25s) Loss: 0.0083(0.1222) EVAL: [360/3883] Data 0.001 (0.002) Elapsed 1m 3s (remain 10m 21s) Loss: 0.0975(0.1220) EVAL: [378/3883] Data 0.001 (0.002) Elapsed 1m 6s (remain 10m 18s) Loss: 0.0593(0.1220) EVAL: [396/3883] Data 0.001 (0.002) Elapsed 1m 10s (remain 10m 14s) Loss: 0.2474(0.1217) EVAL: [414/3883] Data 0.001 (0.002) Elapsed 1m 13s (remain 10m 11s) Loss: 0.1649(0.1225) EVAL: [432/3883] Data 0.001 (0.002) Elapsed 1m 16s (remain 10m 8s) Loss: 0.0641(0.1224) EVAL: [450/3883] Data 0.001 (0.002) Elapsed 1m 19s (remain 10m 4s) Loss: 0.1556(0.1231) EVAL: [468/3883] Data 0.001 (0.002) Elapsed 1m 22s (remain 10m 1s) Loss: 0.0385(0.1244) EVAL: [486/3883] Data 0.001 (0.002) Elapsed 1m 25s (remain 9m 58s) Loss: 0.2705(0.1240) EVAL: [504/3883] Data 0.001 (0.002) Elapsed 1m 28s (remain 9m 54s) Loss: 0.0547(0.1249) EVAL: [522/3883] Data 0.001 (0.002) Elapsed 1m 32s (remain 9m 51s) Loss: 0.2338(0.1243) EVAL: [540/3883] Data 0.001 (0.002) Elapsed 1m 35s (remain 9m 48s) Loss: 0.0475(0.1233) EVAL: [558/3883] Data 0.001 (0.002) Elapsed 1m 38s (remain 9m 44s) Loss: 0.2796(0.1237) EVAL: [576/3883] Data 0.001 (0.002) Elapsed 1m 41s (remain 9m 41s) Loss: 0.0206(0.1229) EVAL: [594/3883] Data 0.001 (0.002) Elapsed 1m 44s (remain 9m 38s) Loss: 0.1175(0.1222) EVAL: [612/3883] Data 0.001 (0.002) Elapsed 1m 47s (remain 9m 35s) Loss: 0.0703(0.1217) EVAL: [630/3883] Data 0.002 (0.002) Elapsed 1m 50s (remain 9m 31s) Loss: 0.0438(0.1214) EVAL: [648/3883] Data 0.001 (0.002) Elapsed 1m 54s (remain 9m 28s) Loss: 0.0498(0.1215) EVAL: [666/3883] Data 0.001 (0.002) Elapsed 1m 57s (remain 9m 25s) Loss: 0.1721(0.1215) EVAL: [684/3883] Data 0.001 (0.002) Elapsed 2m 0s (remain 9m 22s) Loss: 0.2022(0.1217) EVAL: [702/3883] Data 0.001 (0.002) Elapsed 2m 3s (remain 9m 18s) Loss: 0.1204(0.1203) EVAL: [720/3883] Data 0.001 (0.002) Elapsed 2m 6s (remain 9m 15s) Loss: 0.1220(0.1197) EVAL: [738/3883] Data 0.001 (0.002) Elapsed 2m 9s (remain 9m 12s) Loss: 0.0128(0.1200) EVAL: [756/3883] Data 0.001 (0.002) Elapsed 2m 13s (remain 9m 9s) Loss: 0.1355(0.1202) EVAL: [774/3883] Data 0.001 (0.002) Elapsed 2m 16s (remain 9m 6s) Loss: 0.0416(0.1198) EVAL: [792/3883] Data 0.001 (0.002) Elapsed 2m 19s (remain 9m 2s) Loss: 0.1321(0.1203) EVAL: [810/3883] Data 0.001 (0.002) Elapsed 2m 22s (remain 8m 59s) Loss: 0.0174(0.1203) EVAL: [828/3883] Data 0.001 (0.002) Elapsed 2m 25s (remain 8m 56s) Loss: 0.0182(0.1206) EVAL: [846/3883] Data 0.001 (0.002) Elapsed 2m 28s (remain 8m 53s) Loss: 0.0520(0.1204) EVAL: [864/3883] Data 0.001 (0.002) Elapsed 2m 31s (remain 8m 50s) Loss: 0.2389(0.1212) EVAL: [882/3883] Data 0.001 (0.002) Elapsed 2m 35s (remain 8m 46s) Loss: 0.0057(0.1210) EVAL: [900/3883] Data 0.001 (0.002) Elapsed 2m 38s (remain 8m 43s) Loss: 0.2384(0.1206) EVAL: [918/3883] Data 0.001 (0.001) Elapsed 2m 41s (remain 8m 40s) Loss: 0.1497(0.1207) EVAL: [936/3883] Data 0.001 (0.001) Elapsed 2m 44s (remain 8m 37s) Loss: 0.2207(0.1208) EVAL: [954/3883] Data 0.001 (0.001) Elapsed 2m 47s (remain 8m 34s) Loss: 0.0393(0.1204) EVAL: [972/3883] Data 0.001 (0.001) Elapsed 2m 50s (remain 8m 30s) Loss: 0.0443(0.1205) EVAL: [990/3883] Data 0.001 (0.001) Elapsed 2m 53s (remain 8m 27s) Loss: 0.0967(0.1200) EVAL: [1008/3883] Data 0.001 (0.001) Elapsed 2m 57s (remain 8m 24s) Loss: 0.3404(0.1207) EVAL: [1026/3883] Data 0.001 (0.001) Elapsed 3m 0s (remain 8m 21s) Loss: 0.0217(0.1201) EVAL: [1044/3883] Data 0.001 (0.001) Elapsed 3m 3s (remain 8m 18s) Loss: 0.0156(0.1199) EVAL: [1062/3883] Data 0.001 (0.001) Elapsed 3m 6s (remain 8m 14s) Loss: 0.0889(0.1198) EVAL: [1080/3883] Data 0.001 (0.001) Elapsed 3m 9s (remain 8m 11s) Loss: 0.0585(0.1197) EVAL: [1098/3883] Data 0.001 (0.001) Elapsed 3m 12s (remain 8m 8s) Loss: 0.1269(0.1193) EVAL: [1116/3883] Data 0.001 (0.001) Elapsed 3m 16s (remain 8m 5s) Loss: 0.0762(0.1195) EVAL: [1134/3883] Data 0.001 (0.001) Elapsed 3m 19s (remain 8m 2s) Loss: 0.0128(0.1193) EVAL: [1152/3883] Data 0.001 (0.001) Elapsed 3m 22s (remain 7m 59s) Loss: 0.1500(0.1191) EVAL: [1170/3883] Data 0.001 (0.001) Elapsed 3m 25s (remain 7m 55s) Loss: 0.1003(0.1186) EVAL: [1188/3883] Data 0.001 (0.001) Elapsed 3m 28s (remain 7m 52s) Loss: 0.1230(0.1185) EVAL: [1206/3883] Data 0.001 (0.001) Elapsed 3m 31s (remain 7m 49s) Loss: 0.0177(0.1185) EVAL: [1224/3883] Data 0.001 (0.001) Elapsed 3m 34s (remain 7m 46s) Loss: 0.1887(0.1185) EVAL: [1242/3883] Data 0.001 (0.001) Elapsed 3m 38s (remain 7m 43s) Loss: 0.1118(0.1186) EVAL: [1260/3883] Data 0.001 (0.001) Elapsed 3m 41s (remain 7m 39s) Loss: 0.4290(0.1188) EVAL: [1278/3883] Data 0.001 (0.001) Elapsed 3m 44s (remain 7m 36s) Loss: 0.2272(0.1192) EVAL: [1296/3883] Data 0.001 (0.001) Elapsed 3m 47s (remain 7m 33s) Loss: 0.0121(0.1193) EVAL: [1314/3883] Data 0.001 (0.001) Elapsed 3m 50s (remain 7m 30s) Loss: 0.0347(0.1198) EVAL: [1332/3883] Data 0.001 (0.001) Elapsed 3m 53s (remain 7m 27s) Loss: 0.1512(0.1196) EVAL: [1350/3883] Data 0.002 (0.001) Elapsed 3m 56s (remain 7m 24s) Loss: 0.0163(0.1196) EVAL: [1368/3883] Data 0.001 (0.001) Elapsed 4m 0s (remain 7m 20s) Loss: 0.2239(0.1204) EVAL: [1386/3883] Data 0.001 (0.001) Elapsed 4m 3s (remain 7m 17s) Loss: 0.0635(0.1204) EVAL: [1404/3883] Data 0.001 (0.001) Elapsed 4m 6s (remain 7m 14s) Loss: 0.0670(0.1206) EVAL: [1422/3883] Data 0.001 (0.001) Elapsed 4m 9s (remain 7m 11s) Loss: 0.1122(0.1206) EVAL: [1440/3883] Data 0.001 (0.001) Elapsed 4m 12s (remain 7m 8s) Loss: 0.1574(0.1201) EVAL: [1458/3883] Data 0.001 (0.001) Elapsed 4m 15s (remain 7m 5s) Loss: 0.0500(0.1202) EVAL: [1476/3883] Data 0.001 (0.001) Elapsed 4m 19s (remain 7m 1s) Loss: 0.0802(0.1199) EVAL: [1494/3883] Data 0.001 (0.001) Elapsed 4m 22s (remain 6m 58s) Loss: 0.2303(0.1197) EVAL: [1512/3883] Data 0.001 (0.001) Elapsed 4m 25s (remain 6m 55s) Loss: 0.3411(0.1196) EVAL: [1530/3883] Data 0.001 (0.001) Elapsed 4m 28s (remain 6m 52s) Loss: 0.2978(0.1198) EVAL: [1548/3883] Data 0.001 (0.001) Elapsed 4m 31s (remain 6m 49s) Loss: 0.0970(0.1196) EVAL: [1566/3883] Data 0.001 (0.001) Elapsed 4m 34s (remain 6m 46s) Loss: 0.2444(0.1194) EVAL: [1584/3883] Data 0.001 (0.001) Elapsed 4m 37s (remain 6m 42s) Loss: 0.0083(0.1194) EVAL: [1602/3883] Data 0.001 (0.001) Elapsed 4m 41s (remain 6m 39s) Loss: 0.2415(0.1198) EVAL: [1620/3883] Data 0.001 (0.001) Elapsed 4m 44s (remain 6m 36s) Loss: 0.1871(0.1203) EVAL: [1638/3883] Data 0.001 (0.001) Elapsed 4m 47s (remain 6m 33s) Loss: 0.1731(0.1206) EVAL: [1656/3883] Data 0.001 (0.001) Elapsed 4m 50s (remain 6m 30s) Loss: 0.1200(0.1206) EVAL: [1674/3883] Data 0.001 (0.001) Elapsed 4m 53s (remain 6m 27s) Loss: 0.2178(0.1206) EVAL: [1692/3883] Data 0.001 (0.001) Elapsed 4m 56s (remain 6m 23s) Loss: 0.0259(0.1204) EVAL: [1710/3883] Data 0.001 (0.001) Elapsed 4m 59s (remain 6m 20s) Loss: 0.1079(0.1208) EVAL: [1728/3883] Data 0.001 (0.001) Elapsed 5m 3s (remain 6m 17s) Loss: 0.0366(0.1208) EVAL: [1746/3883] Data 0.001 (0.001) Elapsed 5m 6s (remain 6m 14s) Loss: 0.1137(0.1208) EVAL: [1764/3883] Data 0.001 (0.001) Elapsed 5m 9s (remain 6m 11s) Loss: 0.1400(0.1211) EVAL: [1782/3883] Data 0.001 (0.001) Elapsed 5m 12s (remain 6m 8s) Loss: 0.3607(0.1212) EVAL: [1800/3883] Data 0.001 (0.001) Elapsed 5m 15s (remain 6m 4s) Loss: 0.2070(0.1213) EVAL: [1818/3883] Data 0.002 (0.001) Elapsed 5m 18s (remain 6m 1s) Loss: 0.0129(0.1213) EVAL: [1836/3883] Data 0.002 (0.001) Elapsed 5m 22s (remain 5m 58s) Loss: 0.0369(0.1215) EVAL: [1854/3883] Data 0.001 (0.001) Elapsed 5m 25s (remain 5m 55s) Loss: 0.0064(0.1216) EVAL: [1872/3883] Data 0.001 (0.001) Elapsed 5m 28s (remain 5m 52s) Loss: 0.2279(0.1216) EVAL: [1890/3883] Data 0.001 (0.001) Elapsed 5m 31s (remain 5m 49s) Loss: 0.0164(0.1215) EVAL: [1908/3883] Data 0.001 (0.001) Elapsed 5m 34s (remain 5m 45s) Loss: 0.1070(0.1215) EVAL: [1926/3883] Data 0.001 (0.001) Elapsed 5m 37s (remain 5m 42s) Loss: 0.0104(0.1214) EVAL: [1944/3883] Data 0.001 (0.001) Elapsed 5m 40s (remain 5m 39s) Loss: 0.0096(0.1216) EVAL: [1962/3883] Data 0.001 (0.001) Elapsed 5m 44s (remain 5m 36s) Loss: 0.1740(0.1219) EVAL: [1980/3883] Data 0.002 (0.001) Elapsed 5m 47s (remain 5m 33s) Loss: 0.1918(0.1220) EVAL: [1998/3883] Data 0.001 (0.001) Elapsed 5m 50s (remain 5m 30s) Loss: 0.1128(0.1220) EVAL: [2016/3883] Data 0.002 (0.001) Elapsed 5m 53s (remain 5m 27s) Loss: 0.0116(0.1223) EVAL: [2034/3883] Data 0.001 (0.001) Elapsed 5m 56s (remain 5m 23s) Loss: 0.0219(0.1222) EVAL: [2052/3883] Data 0.001 (0.001) Elapsed 5m 59s (remain 5m 20s) Loss: 0.3381(0.1221) EVAL: [2070/3883] Data 0.001 (0.001) Elapsed 6m 2s (remain 5m 17s) Loss: 0.2170(0.1224) EVAL: [2088/3883] Data 0.001 (0.001) Elapsed 6m 6s (remain 5m 14s) Loss: 0.0133(0.1221) EVAL: [2106/3883] Data 0.001 (0.001) Elapsed 6m 9s (remain 5m 11s) Loss: 0.0963(0.1221) EVAL: [2124/3883] Data 0.001 (0.001) Elapsed 6m 12s (remain 5m 8s) Loss: 0.0834(0.1218) EVAL: [2142/3883] Data 0.001 (0.001) Elapsed 6m 15s (remain 5m 4s) Loss: 0.0632(0.1217) EVAL: [2160/3883] Data 0.001 (0.001) Elapsed 6m 18s (remain 5m 1s) Loss: 0.1359(0.1214) EVAL: [2178/3883] Data 0.001 (0.001) Elapsed 6m 21s (remain 4m 58s) Loss: 0.0076(0.1212) EVAL: [2196/3883] Data 0.001 (0.001) Elapsed 6m 25s (remain 4m 55s) Loss: 0.0729(0.1211) EVAL: [2214/3883] Data 0.001 (0.001) Elapsed 6m 28s (remain 4m 52s) Loss: 0.0841(0.1210) EVAL: [2232/3883] Data 0.001 (0.001) Elapsed 6m 31s (remain 4m 49s) Loss: 0.2169(0.1212) EVAL: [2250/3883] Data 0.001 (0.001) Elapsed 6m 34s (remain 4m 45s) Loss: 0.5387(0.1212) EVAL: [2268/3883] Data 0.001 (0.001) Elapsed 6m 37s (remain 4m 42s) Loss: 0.0686(0.1214) EVAL: [2286/3883] Data 0.001 (0.001) Elapsed 6m 40s (remain 4m 39s) Loss: 0.2542(0.1215) EVAL: [2304/3883] Data 0.001 (0.001) Elapsed 6m 43s (remain 4m 36s) Loss: 0.1923(0.1213) EVAL: [2322/3883] Data 0.001 (0.001) Elapsed 6m 47s (remain 4m 33s) Loss: 0.0269(0.1212) EVAL: [2340/3883] Data 0.001 (0.001) Elapsed 6m 50s (remain 4m 30s) Loss: 0.0904(0.1211) EVAL: [2358/3883] Data 0.001 (0.001) Elapsed 6m 53s (remain 4m 27s) Loss: 0.2938(0.1210) EVAL: [2376/3883] Data 0.001 (0.001) Elapsed 6m 56s (remain 4m 23s) Loss: 0.1867(0.1212) EVAL: [2394/3883] Data 0.001 (0.001) Elapsed 6m 59s (remain 4m 20s) Loss: 0.3843(0.1211) EVAL: [2412/3883] Data 0.001 (0.001) Elapsed 7m 2s (remain 4m 17s) Loss: 0.1632(0.1211) EVAL: [2430/3883] Data 0.001 (0.001) Elapsed 7m 5s (remain 4m 14s) Loss: 0.0386(0.1210) EVAL: [2448/3883] Data 0.001 (0.001) Elapsed 7m 9s (remain 4m 11s) Loss: 0.0925(0.1207) EVAL: [2466/3883] Data 0.001 (0.001) Elapsed 7m 12s (remain 4m 8s) Loss: 0.0870(0.1206) EVAL: [2484/3883] Data 0.001 (0.001) Elapsed 7m 15s (remain 4m 4s) Loss: 0.2603(0.1207) EVAL: [2502/3883] Data 0.001 (0.001) Elapsed 7m 18s (remain 4m 1s) Loss: 0.0400(0.1209) EVAL: [2520/3883] Data 0.002 (0.001) Elapsed 7m 21s (remain 3m 58s) Loss: 0.0408(0.1206) EVAL: [2538/3883] Data 0.001 (0.001) Elapsed 7m 24s (remain 3m 55s) Loss: 0.1029(0.1210) EVAL: [2556/3883] Data 0.001 (0.001) Elapsed 7m 28s (remain 3m 52s) Loss: 0.4657(0.1209) EVAL: [2574/3883] Data 0.001 (0.001) Elapsed 7m 31s (remain 3m 49s) Loss: 0.5159(0.1210) EVAL: [2592/3883] Data 0.001 (0.001) Elapsed 7m 34s (remain 3m 46s) Loss: 0.0422(0.1211) EVAL: [2610/3883] Data 0.001 (0.001) Elapsed 7m 37s (remain 3m 42s) Loss: 0.1755(0.1213) EVAL: [2628/3883] Data 0.001 (0.001) Elapsed 7m 40s (remain 3m 39s) Loss: 0.0424(0.1213) EVAL: [2646/3883] Data 0.001 (0.001) Elapsed 7m 43s (remain 3m 36s) Loss: 0.1898(0.1213) EVAL: [2664/3883] Data 0.001 (0.001) Elapsed 7m 46s (remain 3m 33s) Loss: 0.0613(0.1211) EVAL: [2682/3883] Data 0.001 (0.001) Elapsed 7m 50s (remain 3m 30s) Loss: 0.1343(0.1211) EVAL: [2700/3883] Data 0.001 (0.001) Elapsed 7m 53s (remain 3m 27s) Loss: 0.0783(0.1211) EVAL: [2718/3883] Data 0.001 (0.001) Elapsed 7m 56s (remain 3m 23s) Loss: 0.1798(0.1212) EVAL: [2736/3883] Data 0.001 (0.001) Elapsed 7m 59s (remain 3m 20s) Loss: 0.0410(0.1213) EVAL: [2754/3883] Data 0.001 (0.001) Elapsed 8m 2s (remain 3m 17s) Loss: 0.0875(0.1217) EVAL: [2772/3883] Data 0.001 (0.001) Elapsed 8m 5s (remain 3m 14s) Loss: 0.1354(0.1218) EVAL: [2790/3883] Data 0.001 (0.001) Elapsed 8m 8s (remain 3m 11s) Loss: 0.0179(0.1217) EVAL: [2808/3883] Data 0.001 (0.001) Elapsed 8m 12s (remain 3m 8s) Loss: 0.1994(0.1217) EVAL: [2826/3883] Data 0.001 (0.001) Elapsed 8m 15s (remain 3m 5s) Loss: 0.1359(0.1216) EVAL: [2844/3883] Data 0.001 (0.001) Elapsed 8m 18s (remain 3m 1s) Loss: 0.1025(0.1216) EVAL: [2862/3883] Data 0.001 (0.001) Elapsed 8m 21s (remain 2m 58s) Loss: 0.3260(0.1215) EVAL: [2880/3883] Data 0.001 (0.001) Elapsed 8m 24s (remain 2m 55s) Loss: 0.0124(0.1213) EVAL: [2898/3883] Data 0.001 (0.001) Elapsed 8m 27s (remain 2m 52s) Loss: 0.2616(0.1214) EVAL: [2916/3883] Data 0.001 (0.001) Elapsed 8m 31s (remain 2m 49s) Loss: 0.1344(0.1213) EVAL: [2934/3883] Data 0.001 (0.001) Elapsed 8m 34s (remain 2m 46s) Loss: 0.3126(0.1212) EVAL: [2952/3883] Data 0.001 (0.001) Elapsed 8m 37s (remain 2m 42s) Loss: 0.0097(0.1213) EVAL: [2970/3883] Data 0.001 (0.001) Elapsed 8m 40s (remain 2m 39s) Loss: 0.0546(0.1213) EVAL: [2988/3883] Data 0.001 (0.001) Elapsed 8m 43s (remain 2m 36s) Loss: 0.0642(0.1214) EVAL: [3006/3883] Data 0.001 (0.001) Elapsed 8m 46s (remain 2m 33s) Loss: 0.0730(0.1213) EVAL: [3024/3883] Data 0.003 (0.001) Elapsed 8m 49s (remain 2m 30s) Loss: 0.3613(0.1213) EVAL: [3042/3883] Data 0.001 (0.001) Elapsed 8m 53s (remain 2m 27s) Loss: 0.0355(0.1211) EVAL: [3060/3883] Data 0.001 (0.001) Elapsed 8m 56s (remain 2m 23s) Loss: 0.1339(0.1212) EVAL: [3078/3883] Data 0.001 (0.001) Elapsed 8m 59s (remain 2m 20s) Loss: 0.2039(0.1211) EVAL: [3096/3883] Data 0.001 (0.001) Elapsed 9m 2s (remain 2m 17s) Loss: 0.5491(0.1212) EVAL: [3114/3883] Data 0.001 (0.001) Elapsed 9m 5s (remain 2m 14s) Loss: 0.1301(0.1212) EVAL: [3132/3883] Data 0.001 (0.001) Elapsed 9m 8s (remain 2m 11s) Loss: 0.0671(0.1211) EVAL: [3150/3883] Data 0.001 (0.001) Elapsed 9m 11s (remain 2m 8s) Loss: 0.0762(0.1210) EVAL: [3168/3883] Data 0.001 (0.001) Elapsed 9m 15s (remain 2m 5s) Loss: 0.2602(0.1209) EVAL: [3186/3883] Data 0.001 (0.001) Elapsed 9m 18s (remain 2m 1s) Loss: 0.0073(0.1206) EVAL: [3204/3883] Data 0.001 (0.001) Elapsed 9m 21s (remain 1m 58s) Loss: 0.0414(0.1206) EVAL: [3222/3883] Data 0.001 (0.001) Elapsed 9m 24s (remain 1m 55s) Loss: 0.2588(0.1206) EVAL: [3240/3883] Data 0.001 (0.001) Elapsed 9m 27s (remain 1m 52s) Loss: 0.4346(0.1207) EVAL: [3258/3883] Data 0.001 (0.001) Elapsed 9m 30s (remain 1m 49s) Loss: 0.0139(0.1206) EVAL: [3276/3883] Data 0.001 (0.001) Elapsed 9m 34s (remain 1m 46s) Loss: 0.0763(0.1205) EVAL: [3294/3883] Data 0.001 (0.001) Elapsed 9m 37s (remain 1m 42s) Loss: 0.1680(0.1206) EVAL: [3312/3883] Data 0.001 (0.001) Elapsed 9m 40s (remain 1m 39s) Loss: 0.0047(0.1206) EVAL: [3330/3883] Data 0.001 (0.001) Elapsed 9m 43s (remain 1m 36s) Loss: 0.2259(0.1205) EVAL: [3348/3883] Data 0.001 (0.001) Elapsed 9m 46s (remain 1m 33s) Loss: 0.0737(0.1205) EVAL: [3366/3883] Data 0.001 (0.001) Elapsed 9m 49s (remain 1m 30s) Loss: 0.0149(0.1204) EVAL: [3384/3883] Data 0.001 (0.001) Elapsed 9m 52s (remain 1m 27s) Loss: 0.1307(0.1206) EVAL: [3402/3883] Data 0.001 (0.001) Elapsed 9m 56s (remain 1m 24s) Loss: 0.1870(0.1207) EVAL: [3420/3883] Data 0.001 (0.001) Elapsed 9m 59s (remain 1m 20s) Loss: 0.1482(0.1206) EVAL: [3438/3883] Data 0.001 (0.001) Elapsed 10m 2s (remain 1m 17s) Loss: 0.2171(0.1208) EVAL: [3456/3883] Data 0.001 (0.001) Elapsed 10m 5s (remain 1m 14s) Loss: 0.2650(0.1209) EVAL: [3474/3883] Data 0.001 (0.001) Elapsed 10m 8s (remain 1m 11s) Loss: 0.0282(0.1210) EVAL: [3492/3883] Data 0.001 (0.001) Elapsed 10m 11s (remain 1m 8s) Loss: 0.1525(0.1211) EVAL: [3510/3883] Data 0.001 (0.001) Elapsed 10m 14s (remain 1m 5s) Loss: 0.0677(0.1210) EVAL: [3528/3883] Data 0.001 (0.001) Elapsed 10m 18s (remain 1m 2s) Loss: 0.5701(0.1212) EVAL: [3546/3883] Data 0.001 (0.001) Elapsed 10m 21s (remain 0m 58s) Loss: 0.2618(0.1212) EVAL: [3564/3883] Data 0.001 (0.001) Elapsed 10m 24s (remain 0m 55s) Loss: 0.1475(0.1210) EVAL: [3582/3883] Data 0.001 (0.001) Elapsed 10m 27s (remain 0m 52s) Loss: 0.0878(0.1210) EVAL: [3600/3883] Data 0.001 (0.001) Elapsed 10m 30s (remain 0m 49s) Loss: 0.0617(0.1209) EVAL: [3618/3883] Data 0.001 (0.001) Elapsed 10m 33s (remain 0m 46s) Loss: 0.1590(0.1210) EVAL: [3636/3883] Data 0.001 (0.001) Elapsed 10m 37s (remain 0m 43s) Loss: 0.0380(0.1210) EVAL: [3654/3883] Data 0.001 (0.001) Elapsed 10m 40s (remain 0m 39s) Loss: 0.0724(0.1209) EVAL: [3672/3883] Data 0.001 (0.001) Elapsed 10m 43s (remain 0m 36s) Loss: 0.4441(0.1211) EVAL: [3690/3883] Data 0.001 (0.001) Elapsed 10m 46s (remain 0m 33s) Loss: 0.0223(0.1212) EVAL: [3708/3883] Data 0.001 (0.001) Elapsed 10m 49s (remain 0m 30s) Loss: 0.0575(0.1214) EVAL: [3726/3883] Data 0.001 (0.001) Elapsed 10m 52s (remain 0m 27s) Loss: 0.1847(0.1214) EVAL: [3744/3883] Data 0.001 (0.001) Elapsed 10m 55s (remain 0m 24s) Loss: 0.1493(0.1214) EVAL: [3762/3883] Data 0.001 (0.001) Elapsed 10m 59s (remain 0m 21s) Loss: 0.0287(0.1213) EVAL: [3780/3883] Data 0.001 (0.001) Elapsed 11m 2s (remain 0m 17s) Loss: 0.0065(0.1212) EVAL: [3798/3883] Data 0.001 (0.001) Elapsed 11m 5s (remain 0m 14s) Loss: 0.0079(0.1213) EVAL: [3816/3883] Data 0.001 (0.001) Elapsed 11m 8s (remain 0m 11s) Loss: 0.0170(0.1213) EVAL: [3834/3883] Data 0.001 (0.001) Elapsed 11m 11s (remain 0m 8s) Loss: 0.0360(0.1214) EVAL: [3852/3883] Data 0.001 (0.001) Elapsed 11m 14s (remain 0m 5s) Loss: 0.2034(0.1214) EVAL: [3870/3883] Data 0.001 (0.001) Elapsed 11m 17s (remain 0m 2s) Loss: 0.0397(0.1212)
Epoch 8 - avg_train_loss: 0.2848 avg_val_loss: 0.1212 time: 1125s Epoch 8 - AUC: 0.9662144744473803 - pAUC: 0.1779875835105583 Epoch 8 - Save Best Score: 0.1780 Model
EVAL: [3882/3883] Data 0.001 (0.001) Elapsed 11m 19s (remain 0m 0s) Loss: 0.2291(0.1212) Epoch: [9][0/650] Data 0.838 (0.838) Elapsed 0m 1s (remain 13m 56s) Loss: 0.3655(0.3655) Grad: 8.7047 LR: 0.000010 Epoch: [9][18/650] Data 0.264 (0.292) Elapsed 0m 13s (remain 7m 31s) Loss: 0.1435(0.2465) Grad: 3.1952 LR: 0.000010 Epoch: [9][36/650] Data 0.268 (0.281) Elapsed 0m 25s (remain 7m 8s) Loss: 0.5164(0.2674) Grad: 26.3932 LR: 0.000010 Epoch: [9][54/650] Data 0.270 (0.276) Elapsed 0m 38s (remain 6m 53s) Loss: 0.3373(0.2798) Grad: 9.3891 LR: 0.000010 Epoch: [9][72/650] Data 0.264 (0.273) Elapsed 0m 50s (remain 6m 39s) Loss: 0.1889(0.2873) Grad: 5.5536 LR: 0.000010 Epoch: [9][90/650] Data 0.266 (0.272) Elapsed 1m 2s (remain 6m 25s) Loss: 0.2429(0.2793) Grad: 8.8979 LR: 0.000010 Epoch: [9][108/650] Data 0.270 (0.271) Elapsed 1m 15s (remain 6m 12s) Loss: 0.2673(0.2798) Grad: 7.3901 LR: 0.000010 Epoch: [9][126/650] Data 0.269 (0.271) Elapsed 1m 27s (remain 6m 0s) Loss: 0.1782(0.2701) Grad: 5.3718 LR: 0.000010 Epoch: [9][144/650] Data 0.267 (0.270) Elapsed 1m 39s (remain 5m 47s) Loss: 0.2164(0.2670) Grad: 7.8991 LR: 0.000010 Epoch: [9][162/650] Data 0.268 (0.269) Elapsed 1m 52s (remain 5m 34s) Loss: 0.1930(0.2619) Grad: 4.7832 LR: 0.000010 Epoch: [9][180/650] Data 0.269 (0.269) Elapsed 2m 4s (remain 5m 22s) Loss: 0.2784(0.2681) Grad: 6.4986 LR: 0.000010 Epoch: [9][198/650] Data 0.259 (0.269) Elapsed 2m 16s (remain 5m 9s) Loss: 0.1528(0.2678) Grad: 8.0429 LR: 0.000010 Epoch: [9][216/650] Data 0.265 (0.269) Elapsed 2m 28s (remain 4m 57s) Loss: 0.2480(0.2691) Grad: 7.6756 LR: 0.000010 Epoch: [9][234/650] Data 0.264 (0.269) Elapsed 2m 41s (remain 4m 44s) Loss: 0.2100(0.2726) Grad: 5.1607 LR: 0.000010 Epoch: [9][252/650] Data 0.269 (0.268) Elapsed 2m 53s (remain 4m 32s) Loss: 0.3568(0.2699) Grad: 13.9682 LR: 0.000010 Epoch: [9][270/650] Data 0.271 (0.268) Elapsed 3m 5s (remain 4m 19s) Loss: 0.1150(0.2669) Grad: 4.2333 LR: 0.000010 Epoch: [9][288/650] Data 0.266 (0.268) Elapsed 3m 18s (remain 4m 7s) Loss: 0.1834(0.2663) Grad: 5.0920 LR: 0.000010 Epoch: [9][306/650] Data 0.261 (0.268) Elapsed 3m 30s (remain 3m 55s) Loss: 0.5434(0.2685) Grad: 15.8955 LR: 0.000010 Epoch: [9][324/650] Data 0.264 (0.268) Elapsed 3m 42s (remain 3m 42s) Loss: 0.1137(0.2680) Grad: 5.6304 LR: 0.000010 Epoch: [9][342/650] Data 0.262 (0.268) Elapsed 3m 55s (remain 3m 30s) Loss: 0.2298(0.2652) Grad: 6.9337 LR: 0.000010 Epoch: [9][360/650] Data 0.271 (0.268) Elapsed 4m 7s (remain 3m 18s) Loss: 0.1744(0.2623) Grad: 4.6304 LR: 0.000010 Epoch: [9][378/650] Data 0.258 (0.268) Elapsed 4m 19s (remain 3m 5s) Loss: 0.3014(0.2604) Grad: 9.6371 LR: 0.000010 Epoch: [9][396/650] Data 0.262 (0.268) Elapsed 4m 32s (remain 2m 53s) Loss: 0.0740(0.2579) Grad: 3.9812 LR: 0.000010 Epoch: [9][414/650] Data 0.271 (0.268) Elapsed 4m 44s (remain 2m 41s) Loss: 0.5871(0.2571) Grad: 13.6334 LR: 0.000010 Epoch: [9][432/650] Data 0.270 (0.267) Elapsed 4m 56s (remain 2m 28s) Loss: 0.2971(0.2568) Grad: 13.8589 LR: 0.000010 Epoch: [9][450/650] Data 0.263 (0.267) Elapsed 5m 8s (remain 2m 16s) Loss: 0.3135(0.2602) Grad: 11.2665 LR: 0.000010 Epoch: [9][468/650] Data 0.266 (0.267) Elapsed 5m 21s (remain 2m 3s) Loss: 0.1600(0.2603) Grad: 5.3893 LR: 0.000010 Epoch: [9][486/650] Data 0.270 (0.267) Elapsed 5m 33s (remain 1m 51s) Loss: 0.2368(0.2602) Grad: 6.0548 LR: 0.000010 Epoch: [9][504/650] Data 0.272 (0.267) Elapsed 5m 45s (remain 1m 39s) Loss: 0.4316(0.2605) Grad: 11.6346 LR: 0.000010 Epoch: [9][522/650] Data 0.268 (0.267) Elapsed 5m 58s (remain 1m 26s) Loss: 0.0571(0.2610) Grad: 2.2202 LR: 0.000010 Epoch: [9][540/650] Data 0.271 (0.267) Elapsed 6m 10s (remain 1m 14s) Loss: 0.3164(0.2617) Grad: 6.8745 LR: 0.000010 Epoch: [9][558/650] Data 0.266 (0.267) Elapsed 6m 22s (remain 1m 2s) Loss: 0.1056(0.2611) Grad: 3.3047 LR: 0.000010 Epoch: [9][576/650] Data 0.247 (0.267) Elapsed 6m 35s (remain 0m 49s) Loss: 0.1810(0.2611) Grad: 5.5736 LR: 0.000010 Epoch: [9][594/650] Data 0.268 (0.267) Elapsed 6m 47s (remain 0m 37s) Loss: 0.3238(0.2622) Grad: 7.9921 LR: 0.000010 Epoch: [9][612/650] Data 0.271 (0.267) Elapsed 6m 59s (remain 0m 25s) Loss: 0.1670(0.2614) Grad: 9.7635 LR: 0.000010 Epoch: [9][630/650] Data 0.254 (0.267) Elapsed 7m 12s (remain 0m 13s) Loss: 0.5266(0.2618) Grad: 11.0858 LR: 0.000010 Epoch: [9][648/650] Data 0.272 (0.267) Elapsed 7m 24s (remain 0m 0s) Loss: 0.1832(0.2627) Grad: 5.1640 LR: 0.000010 Epoch: [9][649/650] Data 0.273 (0.267) Elapsed 7m 25s (remain 0m 0s) Loss: 0.3681(0.2629) Grad: 8.9645 LR: 0.000010 EVAL: [0/3883] Data 0.480 (0.480) Elapsed 0m 0s (remain 42m 31s) Loss: 0.1005(0.1005) EVAL: [18/3883] Data 0.001 (0.027) Elapsed 0m 3s (remain 12m 57s) Loss: 0.0831(0.0763) EVAL: [36/3883] Data 0.001 (0.014) Elapsed 0m 6s (remain 12m 4s) Loss: 0.0466(0.0909) EVAL: [54/3883] Data 0.001 (0.010) Elapsed 0m 10s (remain 11m 44s) Loss: 0.0182(0.0892) EVAL: [72/3883] Data 0.001 (0.008) Elapsed 0m 13s (remain 11m 32s) Loss: 0.2322(0.1021) EVAL: [90/3883] Data 0.001 (0.006) Elapsed 0m 16s (remain 11m 24s) Loss: 0.2105(0.1064) EVAL: [108/3883] Data 0.001 (0.005) Elapsed 0m 19s (remain 11m 17s) Loss: 0.1491(0.1084) EVAL: [126/3883] Data 0.001 (0.005) Elapsed 0m 22s (remain 11m 11s) Loss: 0.3042(0.1116) EVAL: [144/3883] Data 0.001 (0.004) Elapsed 0m 25s (remain 11m 6s) Loss: 0.1500(0.1069) EVAL: [162/3883] Data 0.001 (0.004) Elapsed 0m 29s (remain 11m 2s) Loss: 0.0361(0.1076) EVAL: [180/3883] Data 0.001 (0.004) Elapsed 0m 32s (remain 10m 57s) Loss: 0.0075(0.1049) EVAL: [198/3883] Data 0.001 (0.003) Elapsed 0m 35s (remain 10m 53s) Loss: 0.0272(0.1086) EVAL: [216/3883] Data 0.001 (0.003) Elapsed 0m 38s (remain 10m 49s) Loss: 0.0299(0.1089) EVAL: [234/3883] Data 0.002 (0.003) Elapsed 0m 41s (remain 10m 46s) Loss: 0.1235(0.1084) EVAL: [252/3883] Data 0.001 (0.003) Elapsed 0m 44s (remain 10m 42s) Loss: 0.0650(0.1059) EVAL: [270/3883] Data 0.001 (0.003) Elapsed 0m 47s (remain 10m 38s) Loss: 0.0329(0.1076) EVAL: [288/3883] Data 0.001 (0.003) Elapsed 0m 51s (remain 10m 35s) Loss: 0.2880(0.1093) EVAL: [306/3883] Data 0.001 (0.003) Elapsed 0m 54s (remain 10m 31s) Loss: 0.0431(0.1090) EVAL: [324/3883] Data 0.001 (0.002) Elapsed 0m 57s (remain 10m 28s) Loss: 0.0581(0.1087) EVAL: [342/3883] Data 0.001 (0.002) Elapsed 1m 0s (remain 10m 24s) Loss: 0.0061(0.1105) EVAL: [360/3883] Data 0.001 (0.002) Elapsed 1m 3s (remain 10m 21s) Loss: 0.0951(0.1105) EVAL: [378/3883] Data 0.001 (0.002) Elapsed 1m 6s (remain 10m 17s) Loss: 0.0498(0.1107) EVAL: [396/3883] Data 0.001 (0.002) Elapsed 1m 9s (remain 10m 14s) Loss: 0.2267(0.1106) EVAL: [414/3883] Data 0.001 (0.002) Elapsed 1m 13s (remain 10m 11s) Loss: 0.1190(0.1114) EVAL: [432/3883] Data 0.001 (0.002) Elapsed 1m 16s (remain 10m 7s) Loss: 0.0674(0.1116) EVAL: [450/3883] Data 0.001 (0.002) Elapsed 1m 19s (remain 10m 4s) Loss: 0.1262(0.1123) EVAL: [468/3883] Data 0.001 (0.002) Elapsed 1m 22s (remain 10m 1s) Loss: 0.0328(0.1133) EVAL: [486/3883] Data 0.001 (0.002) Elapsed 1m 25s (remain 9m 57s) Loss: 0.2744(0.1130) EVAL: [504/3883] Data 0.001 (0.002) Elapsed 1m 28s (remain 9m 54s) Loss: 0.0368(0.1134) EVAL: [522/3883] Data 0.001 (0.002) Elapsed 1m 32s (remain 9m 51s) Loss: 0.2586(0.1131) EVAL: [540/3883] Data 0.001 (0.002) Elapsed 1m 35s (remain 9m 47s) Loss: 0.0459(0.1119) EVAL: [558/3883] Data 0.001 (0.002) Elapsed 1m 38s (remain 9m 44s) Loss: 0.2911(0.1122) EVAL: [576/3883] Data 0.001 (0.002) Elapsed 1m 41s (remain 9m 41s) Loss: 0.0165(0.1115) EVAL: [594/3883] Data 0.001 (0.002) Elapsed 1m 44s (remain 9m 38s) Loss: 0.1139(0.1108) EVAL: [612/3883] Data 0.001 (0.002) Elapsed 1m 47s (remain 9m 34s) Loss: 0.0720(0.1104) EVAL: [630/3883] Data 0.001 (0.002) Elapsed 1m 50s (remain 9m 31s) Loss: 0.0230(0.1102) EVAL: [648/3883] Data 0.004 (0.002) Elapsed 1m 54s (remain 9m 28s) Loss: 0.0329(0.1102) EVAL: [666/3883] Data 0.001 (0.002) Elapsed 1m 57s (remain 9m 25s) Loss: 0.2063(0.1104) EVAL: [684/3883] Data 0.001 (0.002) Elapsed 2m 0s (remain 9m 22s) Loss: 0.1626(0.1105) EVAL: [702/3883] Data 0.002 (0.002) Elapsed 2m 3s (remain 9m 18s) Loss: 0.1010(0.1091) EVAL: [720/3883] Data 0.001 (0.002) Elapsed 2m 6s (remain 9m 15s) Loss: 0.0803(0.1085) EVAL: [738/3883] Data 0.001 (0.002) Elapsed 2m 9s (remain 9m 12s) Loss: 0.0033(0.1087) EVAL: [756/3883] Data 0.001 (0.002) Elapsed 2m 12s (remain 9m 9s) Loss: 0.1369(0.1088) EVAL: [774/3883] Data 0.001 (0.002) Elapsed 2m 16s (remain 9m 6s) Loss: 0.0332(0.1083) EVAL: [792/3883] Data 0.001 (0.002) Elapsed 2m 19s (remain 9m 2s) Loss: 0.1277(0.1088) EVAL: [810/3883] Data 0.001 (0.002) Elapsed 2m 22s (remain 8m 59s) Loss: 0.0129(0.1090) EVAL: [828/3883] Data 0.001 (0.002) Elapsed 2m 25s (remain 8m 56s) Loss: 0.0114(0.1091) EVAL: [846/3883] Data 0.001 (0.002) Elapsed 2m 28s (remain 8m 53s) Loss: 0.0303(0.1089) EVAL: [864/3883] Data 0.001 (0.001) Elapsed 2m 31s (remain 8m 49s) Loss: 0.2472(0.1096) EVAL: [882/3883] Data 0.001 (0.001) Elapsed 2m 35s (remain 8m 46s) Loss: 0.0071(0.1096) EVAL: [900/3883] Data 0.001 (0.001) Elapsed 2m 38s (remain 8m 43s) Loss: 0.2446(0.1092) EVAL: [918/3883] Data 0.001 (0.001) Elapsed 2m 41s (remain 8m 40s) Loss: 0.0989(0.1091) EVAL: [936/3883] Data 0.001 (0.001) Elapsed 2m 44s (remain 8m 37s) Loss: 0.2149(0.1092) EVAL: [954/3883] Data 0.001 (0.001) Elapsed 2m 47s (remain 8m 34s) Loss: 0.0410(0.1087) EVAL: [972/3883] Data 0.001 (0.001) Elapsed 2m 50s (remain 8m 30s) Loss: 0.0300(0.1089) EVAL: [990/3883] Data 0.001 (0.001) Elapsed 2m 53s (remain 8m 27s) Loss: 0.0865(0.1085) EVAL: [1008/3883] Data 0.001 (0.001) Elapsed 2m 57s (remain 8m 24s) Loss: 0.3265(0.1090) EVAL: [1026/3883] Data 0.001 (0.001) Elapsed 3m 0s (remain 8m 21s) Loss: 0.0177(0.1083) EVAL: [1044/3883] Data 0.001 (0.001) Elapsed 3m 3s (remain 8m 18s) Loss: 0.0175(0.1082) EVAL: [1062/3883] Data 0.001 (0.001) Elapsed 3m 6s (remain 8m 14s) Loss: 0.0488(0.1080) EVAL: [1080/3883] Data 0.001 (0.001) Elapsed 3m 9s (remain 8m 11s) Loss: 0.0422(0.1078) EVAL: [1098/3883] Data 0.001 (0.001) Elapsed 3m 12s (remain 8m 8s) Loss: 0.1193(0.1075) EVAL: [1116/3883] Data 0.001 (0.001) Elapsed 3m 16s (remain 8m 5s) Loss: 0.0648(0.1076) EVAL: [1134/3883] Data 0.001 (0.001) Elapsed 3m 19s (remain 8m 2s) Loss: 0.0074(0.1075) EVAL: [1152/3883] Data 0.001 (0.001) Elapsed 3m 22s (remain 7m 59s) Loss: 0.1423(0.1072) EVAL: [1170/3883] Data 0.002 (0.001) Elapsed 3m 25s (remain 7m 55s) Loss: 0.1163(0.1068) EVAL: [1188/3883] Data 0.001 (0.001) Elapsed 3m 28s (remain 7m 52s) Loss: 0.0859(0.1067) EVAL: [1206/3883] Data 0.001 (0.001) Elapsed 3m 31s (remain 7m 49s) Loss: 0.0192(0.1067) EVAL: [1224/3883] Data 0.001 (0.001) Elapsed 3m 34s (remain 7m 46s) Loss: 0.1777(0.1067) EVAL: [1242/3883] Data 0.001 (0.001) Elapsed 3m 38s (remain 7m 43s) Loss: 0.0875(0.1067) EVAL: [1260/3883] Data 0.001 (0.001) Elapsed 3m 41s (remain 7m 39s) Loss: 0.4459(0.1070) EVAL: [1278/3883] Data 0.001 (0.001) Elapsed 3m 44s (remain 7m 36s) Loss: 0.2281(0.1075) EVAL: [1296/3883] Data 0.001 (0.001) Elapsed 3m 47s (remain 7m 33s) Loss: 0.0093(0.1076) EVAL: [1314/3883] Data 0.001 (0.001) Elapsed 3m 50s (remain 7m 30s) Loss: 0.0282(0.1080) EVAL: [1332/3883] Data 0.001 (0.001) Elapsed 3m 53s (remain 7m 27s) Loss: 0.1142(0.1078) EVAL: [1350/3883] Data 0.001 (0.001) Elapsed 3m 56s (remain 7m 24s) Loss: 0.0175(0.1079) EVAL: [1368/3883] Data 0.001 (0.001) Elapsed 4m 0s (remain 7m 20s) Loss: 0.2056(0.1085) EVAL: [1386/3883] Data 0.001 (0.001) Elapsed 4m 3s (remain 7m 17s) Loss: 0.0719(0.1083) EVAL: [1404/3883] Data 0.001 (0.001) Elapsed 4m 6s (remain 7m 14s) Loss: 0.0568(0.1085) EVAL: [1422/3883] Data 0.001 (0.001) Elapsed 4m 9s (remain 7m 11s) Loss: 0.1032(0.1085) EVAL: [1440/3883] Data 0.001 (0.001) Elapsed 4m 12s (remain 7m 8s) Loss: 0.1024(0.1080) EVAL: [1458/3883] Data 0.001 (0.001) Elapsed 4m 15s (remain 7m 5s) Loss: 0.0351(0.1082) EVAL: [1476/3883] Data 0.001 (0.001) Elapsed 4m 19s (remain 7m 1s) Loss: 0.0687(0.1080) EVAL: [1494/3883] Data 0.001 (0.001) Elapsed 4m 22s (remain 6m 58s) Loss: 0.2244(0.1078) EVAL: [1512/3883] Data 0.001 (0.001) Elapsed 4m 25s (remain 6m 55s) Loss: 0.3324(0.1077) EVAL: [1530/3883] Data 0.001 (0.001) Elapsed 4m 28s (remain 6m 52s) Loss: 0.2903(0.1080) EVAL: [1548/3883] Data 0.001 (0.001) Elapsed 4m 31s (remain 6m 49s) Loss: 0.0748(0.1078) EVAL: [1566/3883] Data 0.001 (0.001) Elapsed 4m 34s (remain 6m 46s) Loss: 0.2162(0.1075) EVAL: [1584/3883] Data 0.001 (0.001) Elapsed 4m 37s (remain 6m 42s) Loss: 0.0059(0.1076) EVAL: [1602/3883] Data 0.001 (0.001) Elapsed 4m 41s (remain 6m 39s) Loss: 0.2413(0.1078) EVAL: [1620/3883] Data 0.001 (0.001) Elapsed 4m 44s (remain 6m 36s) Loss: 0.1888(0.1085) EVAL: [1638/3883] Data 0.001 (0.001) Elapsed 4m 47s (remain 6m 33s) Loss: 0.1893(0.1087) EVAL: [1656/3883] Data 0.001 (0.001) Elapsed 4m 50s (remain 6m 30s) Loss: 0.1045(0.1087) EVAL: [1674/3883] Data 0.001 (0.001) Elapsed 4m 53s (remain 6m 27s) Loss: 0.2248(0.1088) EVAL: [1692/3883] Data 0.001 (0.001) Elapsed 4m 56s (remain 6m 23s) Loss: 0.0153(0.1086) EVAL: [1710/3883] Data 0.001 (0.001) Elapsed 4m 59s (remain 6m 20s) Loss: 0.0612(0.1089) EVAL: [1728/3883] Data 0.001 (0.001) Elapsed 5m 3s (remain 6m 17s) Loss: 0.0357(0.1089) EVAL: [1746/3883] Data 0.001 (0.001) Elapsed 5m 6s (remain 6m 14s) Loss: 0.1038(0.1088) EVAL: [1764/3883] Data 0.001 (0.001) Elapsed 5m 9s (remain 6m 11s) Loss: 0.1522(0.1092) EVAL: [1782/3883] Data 0.001 (0.001) Elapsed 5m 12s (remain 6m 8s) Loss: 0.4192(0.1093) EVAL: [1800/3883] Data 0.001 (0.001) Elapsed 5m 15s (remain 6m 4s) Loss: 0.1741(0.1093) EVAL: [1818/3883] Data 0.001 (0.001) Elapsed 5m 18s (remain 6m 1s) Loss: 0.0129(0.1093) EVAL: [1836/3883] Data 0.001 (0.001) Elapsed 5m 22s (remain 5m 58s) Loss: 0.0317(0.1095) EVAL: [1854/3883] Data 0.001 (0.001) Elapsed 5m 25s (remain 5m 55s) Loss: 0.0032(0.1097) EVAL: [1872/3883] Data 0.001 (0.001) Elapsed 5m 28s (remain 5m 52s) Loss: 0.2144(0.1097) EVAL: [1890/3883] Data 0.001 (0.001) Elapsed 5m 31s (remain 5m 49s) Loss: 0.0181(0.1097) EVAL: [1908/3883] Data 0.001 (0.001) Elapsed 5m 34s (remain 5m 46s) Loss: 0.0809(0.1097) EVAL: [1926/3883] Data 0.001 (0.001) Elapsed 5m 37s (remain 5m 42s) Loss: 0.0093(0.1096) EVAL: [1944/3883] Data 0.001 (0.001) Elapsed 5m 40s (remain 5m 39s) Loss: 0.0068(0.1098) EVAL: [1962/3883] Data 0.002 (0.001) Elapsed 5m 44s (remain 5m 36s) Loss: 0.1869(0.1101) EVAL: [1980/3883] Data 0.001 (0.001) Elapsed 5m 47s (remain 5m 33s) Loss: 0.1879(0.1101) EVAL: [1998/3883] Data 0.001 (0.001) Elapsed 5m 50s (remain 5m 30s) Loss: 0.1670(0.1102) EVAL: [2016/3883] Data 0.001 (0.001) Elapsed 5m 53s (remain 5m 27s) Loss: 0.0117(0.1105) EVAL: [2034/3883] Data 0.001 (0.001) Elapsed 5m 56s (remain 5m 23s) Loss: 0.0180(0.1103) EVAL: [2052/3883] Data 0.001 (0.001) Elapsed 5m 59s (remain 5m 20s) Loss: 0.2760(0.1102) EVAL: [2070/3883] Data 0.001 (0.001) Elapsed 6m 2s (remain 5m 17s) Loss: 0.2451(0.1105) EVAL: [2088/3883] Data 0.001 (0.001) Elapsed 6m 6s (remain 5m 14s) Loss: 0.0162(0.1102) EVAL: [2106/3883] Data 0.002 (0.001) Elapsed 6m 9s (remain 5m 11s) Loss: 0.0824(0.1102) EVAL: [2124/3883] Data 0.001 (0.001) Elapsed 6m 12s (remain 5m 8s) Loss: 0.0518(0.1098) EVAL: [2142/3883] Data 0.001 (0.001) Elapsed 6m 15s (remain 5m 4s) Loss: 0.0589(0.1097) EVAL: [2160/3883] Data 0.001 (0.001) Elapsed 6m 18s (remain 5m 1s) Loss: 0.0705(0.1094) EVAL: [2178/3883] Data 0.001 (0.001) Elapsed 6m 21s (remain 4m 58s) Loss: 0.0060(0.1091) EVAL: [2196/3883] Data 0.001 (0.001) Elapsed 6m 25s (remain 4m 55s) Loss: 0.0325(0.1090) EVAL: [2214/3883] Data 0.001 (0.001) Elapsed 6m 28s (remain 4m 52s) Loss: 0.0705(0.1089) EVAL: [2232/3883] Data 0.001 (0.001) Elapsed 6m 31s (remain 4m 49s) Loss: 0.1807(0.1091) EVAL: [2250/3883] Data 0.001 (0.001) Elapsed 6m 34s (remain 4m 45s) Loss: 0.4865(0.1092) EVAL: [2268/3883] Data 0.001 (0.001) Elapsed 6m 37s (remain 4m 42s) Loss: 0.0506(0.1094) EVAL: [2286/3883] Data 0.001 (0.001) Elapsed 6m 40s (remain 4m 39s) Loss: 0.1842(0.1095) EVAL: [2304/3883] Data 0.001 (0.001) Elapsed 6m 43s (remain 4m 36s) Loss: 0.1605(0.1093) EVAL: [2322/3883] Data 0.001 (0.001) Elapsed 6m 47s (remain 4m 33s) Loss: 0.0077(0.1092) EVAL: [2340/3883] Data 0.001 (0.001) Elapsed 6m 50s (remain 4m 30s) Loss: 0.0990(0.1092) EVAL: [2358/3883] Data 0.001 (0.001) Elapsed 6m 53s (remain 4m 27s) Loss: 0.2900(0.1091) EVAL: [2376/3883] Data 0.001 (0.001) Elapsed 6m 56s (remain 4m 23s) Loss: 0.2033(0.1093) EVAL: [2394/3883] Data 0.001 (0.001) Elapsed 6m 59s (remain 4m 20s) Loss: 0.4054(0.1092) EVAL: [2412/3883] Data 0.002 (0.001) Elapsed 7m 2s (remain 4m 17s) Loss: 0.1463(0.1091) EVAL: [2430/3883] Data 0.001 (0.001) Elapsed 7m 5s (remain 4m 14s) Loss: 0.0300(0.1090) EVAL: [2448/3883] Data 0.001 (0.001) Elapsed 7m 9s (remain 4m 11s) Loss: 0.0708(0.1087) EVAL: [2466/3883] Data 0.001 (0.001) Elapsed 7m 12s (remain 4m 8s) Loss: 0.0738(0.1086) EVAL: [2484/3883] Data 0.001 (0.001) Elapsed 7m 15s (remain 4m 4s) Loss: 0.2503(0.1087) EVAL: [2502/3883] Data 0.001 (0.001) Elapsed 7m 18s (remain 4m 1s) Loss: 0.0470(0.1088) EVAL: [2520/3883] Data 0.001 (0.001) Elapsed 7m 21s (remain 3m 58s) Loss: 0.0241(0.1085) EVAL: [2538/3883] Data 0.001 (0.001) Elapsed 7m 24s (remain 3m 55s) Loss: 0.0569(0.1089) EVAL: [2556/3883] Data 0.001 (0.001) Elapsed 7m 28s (remain 3m 52s) Loss: 0.4817(0.1088) EVAL: [2574/3883] Data 0.002 (0.001) Elapsed 7m 31s (remain 3m 49s) Loss: 0.5602(0.1089) EVAL: [2592/3883] Data 0.001 (0.001) Elapsed 7m 34s (remain 3m 46s) Loss: 0.0343(0.1090) EVAL: [2610/3883] Data 0.001 (0.001) Elapsed 7m 37s (remain 3m 42s) Loss: 0.1469(0.1093) EVAL: [2628/3883] Data 0.001 (0.001) Elapsed 7m 40s (remain 3m 39s) Loss: 0.0355(0.1094) EVAL: [2646/3883] Data 0.001 (0.001) Elapsed 7m 43s (remain 3m 36s) Loss: 0.1739(0.1093) EVAL: [2664/3883] Data 0.001 (0.001) Elapsed 7m 46s (remain 3m 33s) Loss: 0.0464(0.1092) EVAL: [2682/3883] Data 0.001 (0.001) Elapsed 7m 50s (remain 3m 30s) Loss: 0.1367(0.1091) EVAL: [2700/3883] Data 0.001 (0.001) Elapsed 7m 53s (remain 3m 27s) Loss: 0.0510(0.1091) EVAL: [2718/3883] Data 0.001 (0.001) Elapsed 7m 56s (remain 3m 23s) Loss: 0.1695(0.1091) EVAL: [2736/3883] Data 0.001 (0.001) Elapsed 7m 59s (remain 3m 20s) Loss: 0.0232(0.1092) EVAL: [2754/3883] Data 0.001 (0.001) Elapsed 8m 2s (remain 3m 17s) Loss: 0.0609(0.1096) EVAL: [2772/3883] Data 0.001 (0.001) Elapsed 8m 5s (remain 3m 14s) Loss: 0.1228(0.1098) EVAL: [2790/3883] Data 0.001 (0.001) Elapsed 8m 8s (remain 3m 11s) Loss: 0.0179(0.1097) EVAL: [2808/3883] Data 0.001 (0.001) Elapsed 8m 12s (remain 3m 8s) Loss: 0.1971(0.1097) EVAL: [2826/3883] Data 0.001 (0.001) Elapsed 8m 15s (remain 3m 5s) Loss: 0.1124(0.1096) EVAL: [2844/3883] Data 0.001 (0.001) Elapsed 8m 18s (remain 3m 1s) Loss: 0.1134(0.1096) EVAL: [2862/3883] Data 0.001 (0.001) Elapsed 8m 21s (remain 2m 58s) Loss: 0.2529(0.1095) EVAL: [2880/3883] Data 0.001 (0.001) Elapsed 8m 24s (remain 2m 55s) Loss: 0.0116(0.1094) EVAL: [2898/3883] Data 0.001 (0.001) Elapsed 8m 27s (remain 2m 52s) Loss: 0.2290(0.1094) EVAL: [2916/3883] Data 0.001 (0.001) Elapsed 8m 31s (remain 2m 49s) Loss: 0.1004(0.1093) EVAL: [2934/3883] Data 0.001 (0.001) Elapsed 8m 34s (remain 2m 46s) Loss: 0.2913(0.1093) EVAL: [2952/3883] Data 0.001 (0.001) Elapsed 8m 37s (remain 2m 42s) Loss: 0.0115(0.1093) EVAL: [2970/3883] Data 0.001 (0.001) Elapsed 8m 40s (remain 2m 39s) Loss: 0.0395(0.1093) EVAL: [2988/3883] Data 0.001 (0.001) Elapsed 8m 43s (remain 2m 36s) Loss: 0.0544(0.1094) EVAL: [3006/3883] Data 0.002 (0.001) Elapsed 8m 46s (remain 2m 33s) Loss: 0.0600(0.1093) EVAL: [3024/3883] Data 0.002 (0.001) Elapsed 8m 49s (remain 2m 30s) Loss: 0.3256(0.1093) EVAL: [3042/3883] Data 0.001 (0.001) Elapsed 8m 53s (remain 2m 27s) Loss: 0.0303(0.1092) EVAL: [3060/3883] Data 0.001 (0.001) Elapsed 8m 56s (remain 2m 24s) Loss: 0.1436(0.1093) EVAL: [3078/3883] Data 0.001 (0.001) Elapsed 8m 59s (remain 2m 20s) Loss: 0.1625(0.1092) EVAL: [3096/3883] Data 0.001 (0.001) Elapsed 9m 2s (remain 2m 17s) Loss: 0.5303(0.1093) EVAL: [3114/3883] Data 0.001 (0.001) Elapsed 9m 5s (remain 2m 14s) Loss: 0.1072(0.1093) EVAL: [3132/3883] Data 0.001 (0.001) Elapsed 9m 8s (remain 2m 11s) Loss: 0.0590(0.1092) EVAL: [3150/3883] Data 0.001 (0.001) Elapsed 9m 12s (remain 2m 8s) Loss: 0.0626(0.1091) EVAL: [3168/3883] Data 0.001 (0.001) Elapsed 9m 15s (remain 2m 5s) Loss: 0.2574(0.1090) EVAL: [3186/3883] Data 0.001 (0.001) Elapsed 9m 18s (remain 2m 1s) Loss: 0.0060(0.1087) EVAL: [3204/3883] Data 0.001 (0.001) Elapsed 9m 21s (remain 1m 58s) Loss: 0.0276(0.1087) EVAL: [3222/3883] Data 0.001 (0.001) Elapsed 9m 24s (remain 1m 55s) Loss: 0.2485(0.1087) EVAL: [3240/3883] Data 0.001 (0.001) Elapsed 9m 27s (remain 1m 52s) Loss: 0.4306(0.1087) EVAL: [3258/3883] Data 0.001 (0.001) Elapsed 9m 30s (remain 1m 49s) Loss: 0.0181(0.1087) EVAL: [3276/3883] Data 0.002 (0.001) Elapsed 9m 34s (remain 1m 46s) Loss: 0.0693(0.1087) EVAL: [3294/3883] Data 0.001 (0.001) Elapsed 9m 37s (remain 1m 43s) Loss: 0.1603(0.1087) EVAL: [3312/3883] Data 0.001 (0.001) Elapsed 9m 40s (remain 1m 39s) Loss: 0.0044(0.1088) EVAL: [3330/3883] Data 0.001 (0.001) Elapsed 9m 43s (remain 1m 36s) Loss: 0.2515(0.1086) EVAL: [3348/3883] Data 0.001 (0.001) Elapsed 9m 46s (remain 1m 33s) Loss: 0.0375(0.1087) EVAL: [3366/3883] Data 0.001 (0.001) Elapsed 9m 49s (remain 1m 30s) Loss: 0.0079(0.1086) EVAL: [3384/3883] Data 0.001 (0.001) Elapsed 9m 52s (remain 1m 27s) Loss: 0.1152(0.1087) EVAL: [3402/3883] Data 0.001 (0.001) Elapsed 9m 56s (remain 1m 24s) Loss: 0.1709(0.1088) EVAL: [3420/3883] Data 0.001 (0.001) Elapsed 9m 59s (remain 1m 20s) Loss: 0.1538(0.1087) EVAL: [3438/3883] Data 0.001 (0.001) Elapsed 10m 2s (remain 1m 17s) Loss: 0.1839(0.1089) EVAL: [3456/3883] Data 0.001 (0.001) Elapsed 10m 5s (remain 1m 14s) Loss: 0.2720(0.1090) EVAL: [3474/3883] Data 0.001 (0.001) Elapsed 10m 8s (remain 1m 11s) Loss: 0.0215(0.1091) EVAL: [3492/3883] Data 0.001 (0.001) Elapsed 10m 11s (remain 1m 8s) Loss: 0.1157(0.1091) EVAL: [3510/3883] Data 0.001 (0.001) Elapsed 10m 15s (remain 1m 5s) Loss: 0.0410(0.1091) EVAL: [3528/3883] Data 0.001 (0.001) Elapsed 10m 18s (remain 1m 2s) Loss: 0.6066(0.1093) EVAL: [3546/3883] Data 0.002 (0.001) Elapsed 10m 21s (remain 0m 58s) Loss: 0.2360(0.1093) EVAL: [3564/3883] Data 0.001 (0.001) Elapsed 10m 24s (remain 0m 55s) Loss: 0.1142(0.1091) EVAL: [3582/3883] Data 0.001 (0.001) Elapsed 10m 27s (remain 0m 52s) Loss: 0.0693(0.1092) EVAL: [3600/3883] Data 0.001 (0.001) Elapsed 10m 30s (remain 0m 49s) Loss: 0.0602(0.1091) EVAL: [3618/3883] Data 0.001 (0.001) Elapsed 10m 33s (remain 0m 46s) Loss: 0.1459(0.1091) EVAL: [3636/3883] Data 0.001 (0.001) Elapsed 10m 37s (remain 0m 43s) Loss: 0.0206(0.1092) EVAL: [3654/3883] Data 0.001 (0.001) Elapsed 10m 40s (remain 0m 39s) Loss: 0.0406(0.1091) EVAL: [3672/3883] Data 0.001 (0.001) Elapsed 10m 43s (remain 0m 36s) Loss: 0.4368(0.1092) EVAL: [3690/3883] Data 0.001 (0.001) Elapsed 10m 46s (remain 0m 33s) Loss: 0.0235(0.1094) EVAL: [3708/3883] Data 0.001 (0.001) Elapsed 10m 49s (remain 0m 30s) Loss: 0.0338(0.1095) EVAL: [3726/3883] Data 0.001 (0.001) Elapsed 10m 52s (remain 0m 27s) Loss: 0.1619(0.1095) EVAL: [3744/3883] Data 0.001 (0.001) Elapsed 10m 55s (remain 0m 24s) Loss: 0.1163(0.1095) EVAL: [3762/3883] Data 0.002 (0.001) Elapsed 10m 59s (remain 0m 21s) Loss: 0.0237(0.1095) EVAL: [3780/3883] Data 0.001 (0.001) Elapsed 11m 2s (remain 0m 17s) Loss: 0.0099(0.1094) EVAL: [3798/3883] Data 0.001 (0.001) Elapsed 11m 5s (remain 0m 14s) Loss: 0.0051(0.1095) EVAL: [3816/3883] Data 0.001 (0.001) Elapsed 11m 8s (remain 0m 11s) Loss: 0.0120(0.1095) EVAL: [3834/3883] Data 0.001 (0.001) Elapsed 11m 11s (remain 0m 8s) Loss: 0.0232(0.1096) EVAL: [3852/3883] Data 0.001 (0.001) Elapsed 11m 14s (remain 0m 5s) Loss: 0.2171(0.1095) EVAL: [3870/3883] Data 0.001 (0.001) Elapsed 11m 18s (remain 0m 2s) Loss: 0.0321(0.1093)
Epoch 9 - avg_train_loss: 0.2629 avg_val_loss: 0.1094 time: 1126s Epoch 9 - AUC: 0.9662948164378854 - pAUC: 0.17790337479323745
EVAL: [3882/3883] Data 0.001 (0.001) Elapsed 11m 19s (remain 0m 0s) Loss: 0.2948(0.1094) Epoch: [10][0/650] Data 0.854 (0.854) Elapsed 0m 1s (remain 14m 4s) Loss: 0.1837(0.1837) Grad: 6.7328 LR: 0.000003 Epoch: [10][18/650] Data 0.271 (0.294) Elapsed 0m 13s (remain 7m 31s) Loss: 0.2002(0.2240) Grad: 6.9892 LR: 0.000003 Epoch: [10][36/650] Data 0.264 (0.281) Elapsed 0m 25s (remain 7m 9s) Loss: 0.3803(0.2699) Grad: 13.9729 LR: 0.000003 Epoch: [10][54/650] Data 0.251 (0.276) Elapsed 0m 38s (remain 6m 53s) Loss: 0.2303(0.2602) Grad: 7.6408 LR: 0.000003 Epoch: [10][72/650] Data 0.257 (0.274) Elapsed 0m 50s (remain 6m 39s) Loss: 0.2180(0.2649) Grad: 5.9644 LR: 0.000003 Epoch: [10][90/650] Data 0.263 (0.272) Elapsed 1m 2s (remain 6m 25s) Loss: 0.4937(0.2756) Grad: 14.5392 LR: 0.000003 Epoch: [10][108/650] Data 0.272 (0.271) Elapsed 1m 15s (remain 6m 12s) Loss: 0.4090(0.2763) Grad: 11.8595 LR: 0.000003 Epoch: [10][126/650] Data 0.270 (0.270) Elapsed 1m 27s (remain 6m 0s) Loss: 0.5089(0.2758) Grad: 11.9833 LR: 0.000003 Epoch: [10][144/650] Data 0.267 (0.270) Elapsed 1m 39s (remain 5m 47s) Loss: 0.2551(0.2664) Grad: 6.8427 LR: 0.000003 Epoch: [10][162/650] Data 0.271 (0.269) Elapsed 1m 52s (remain 5m 34s) Loss: 0.2661(0.2622) Grad: 8.2841 LR: 0.000003 Epoch: [10][180/650] Data 0.272 (0.269) Elapsed 2m 4s (remain 5m 22s) Loss: 0.3128(0.2685) Grad: 7.1546 LR: 0.000003 Epoch: [10][198/650] Data 0.271 (0.269) Elapsed 2m 16s (remain 5m 9s) Loss: 0.1877(0.2700) Grad: 6.6630 LR: 0.000003 Epoch: [10][216/650] Data 0.257 (0.268) Elapsed 2m 28s (remain 4m 57s) Loss: 0.3193(0.2698) Grad: 6.1477 LR: 0.000003 Epoch: [10][234/650] Data 0.268 (0.268) Elapsed 2m 41s (remain 4m 44s) Loss: 0.2159(0.2685) Grad: 7.3384 LR: 0.000003 Epoch: [10][252/650] Data 0.267 (0.268) Elapsed 2m 53s (remain 4m 32s) Loss: 0.1873(0.2712) Grad: 7.7811 LR: 0.000003 Epoch: [10][270/650] Data 0.271 (0.268) Elapsed 3m 5s (remain 4m 19s) Loss: 0.1239(0.2689) Grad: 4.2989 LR: 0.000003 Epoch: [10][288/650] Data 0.259 (0.268) Elapsed 3m 18s (remain 4m 7s) Loss: 0.3556(0.2704) Grad: 9.7239 LR: 0.000003 Epoch: [10][306/650] Data 0.266 (0.268) Elapsed 3m 30s (remain 3m 55s) Loss: 0.2081(0.2697) Grad: 9.5482 LR: 0.000003 Epoch: [10][324/650] Data 0.265 (0.268) Elapsed 3m 42s (remain 3m 42s) Loss: 0.2062(0.2694) Grad: 7.4909 LR: 0.000003 Epoch: [10][342/650] Data 0.258 (0.268) Elapsed 3m 55s (remain 3m 30s) Loss: 0.2902(0.2683) Grad: 10.9024 LR: 0.000003 Epoch: [10][360/650] Data 0.271 (0.268) Elapsed 4m 7s (remain 3m 18s) Loss: 0.1554(0.2654) Grad: 5.2656 LR: 0.000003 Epoch: [10][378/650] Data 0.272 (0.268) Elapsed 4m 19s (remain 3m 5s) Loss: 0.2024(0.2630) Grad: 6.0196 LR: 0.000003 Epoch: [10][396/650] Data 0.272 (0.268) Elapsed 4m 32s (remain 2m 53s) Loss: 0.1046(0.2607) Grad: 5.0755 LR: 0.000003 Epoch: [10][414/650] Data 0.266 (0.267) Elapsed 4m 44s (remain 2m 41s) Loss: 0.2592(0.2574) Grad: 8.2319 LR: 0.000003 Epoch: [10][432/650] Data 0.263 (0.267) Elapsed 4m 56s (remain 2m 28s) Loss: 0.2443(0.2572) Grad: 8.5354 LR: 0.000003 Epoch: [10][450/650] Data 0.268 (0.267) Elapsed 5m 9s (remain 2m 16s) Loss: 0.2911(0.2605) Grad: 7.7450 LR: 0.000003 Epoch: [10][468/650] Data 0.271 (0.267) Elapsed 5m 21s (remain 2m 4s) Loss: 0.1777(0.2609) Grad: 9.2352 LR: 0.000003 Epoch: [10][486/650] Data 0.254 (0.267) Elapsed 5m 33s (remain 1m 51s) Loss: 0.2057(0.2605) Grad: 6.2431 LR: 0.000003 Epoch: [10][504/650] Data 0.265 (0.267) Elapsed 5m 45s (remain 1m 39s) Loss: 0.1882(0.2598) Grad: 6.3767 LR: 0.000003 Epoch: [10][522/650] Data 0.272 (0.267) Elapsed 5m 58s (remain 1m 26s) Loss: 0.0920(0.2601) Grad: 3.5909 LR: 0.000003 Epoch: [10][540/650] Data 0.270 (0.267) Elapsed 6m 10s (remain 1m 14s) Loss: 0.3816(0.2606) Grad: 10.6286 LR: 0.000003 Epoch: [10][558/650] Data 0.262 (0.267) Elapsed 6m 22s (remain 1m 2s) Loss: 0.2080(0.2614) Grad: 6.6038 LR: 0.000003 Epoch: [10][576/650] Data 0.272 (0.267) Elapsed 6m 35s (remain 0m 49s) Loss: 0.2094(0.2617) Grad: 6.9838 LR: 0.000003 Epoch: [10][594/650] Data 0.264 (0.267) Elapsed 6m 47s (remain 0m 37s) Loss: 0.2218(0.2617) Grad: 10.5431 LR: 0.000003 Epoch: [10][612/650] Data 0.267 (0.267) Elapsed 6m 59s (remain 0m 25s) Loss: 0.1747(0.2620) Grad: 5.9795 LR: 0.000003 Epoch: [10][630/650] Data 0.271 (0.267) Elapsed 7m 12s (remain 0m 13s) Loss: 0.5635(0.2626) Grad: 16.3536 LR: 0.000003 Epoch: [10][648/650] Data 0.272 (0.267) Elapsed 7m 24s (remain 0m 0s) Loss: 0.1982(0.2622) Grad: 6.5250 LR: 0.000003 Epoch: [10][649/650] Data 0.273 (0.267) Elapsed 7m 25s (remain 0m 0s) Loss: 0.3294(0.2623) Grad: 10.0549 LR: 0.000003 EVAL: [0/3883] Data 0.629 (0.629) Elapsed 0m 0s (remain 52m 12s) Loss: 0.1193(0.1193) EVAL: [18/3883] Data 0.001 (0.034) Elapsed 0m 3s (remain 13m 25s) Loss: 0.0927(0.0864) EVAL: [36/3883] Data 0.001 (0.018) Elapsed 0m 7s (remain 12m 19s) Loss: 0.0558(0.1018) EVAL: [54/3883] Data 0.001 (0.012) Elapsed 0m 10s (remain 11m 54s) Loss: 0.0136(0.1002) EVAL: [72/3883] Data 0.001 (0.010) Elapsed 0m 13s (remain 11m 40s) Loss: 0.2478(0.1131) EVAL: [90/3883] Data 0.002 (0.008) Elapsed 0m 16s (remain 11m 30s) Loss: 0.2071(0.1166) EVAL: [108/3883] Data 0.001 (0.007) Elapsed 0m 19s (remain 11m 22s) Loss: 0.1652(0.1190) EVAL: [126/3883] Data 0.001 (0.006) Elapsed 0m 22s (remain 11m 16s) Loss: 0.3295(0.1222) EVAL: [144/3883] Data 0.001 (0.005) Elapsed 0m 26s (remain 11m 10s) Loss: 0.1643(0.1177) EVAL: [162/3883] Data 0.001 (0.005) Elapsed 0m 29s (remain 11m 5s) Loss: 0.0381(0.1184) EVAL: [180/3883] Data 0.001 (0.004) Elapsed 0m 32s (remain 11m 1s) Loss: 0.0082(0.1154) EVAL: [198/3883] Data 0.001 (0.004) Elapsed 0m 35s (remain 10m 56s) Loss: 0.0300(0.1188) EVAL: [216/3883] Data 0.001 (0.004) Elapsed 0m 38s (remain 10m 52s) Loss: 0.0505(0.1194) EVAL: [234/3883] Data 0.001 (0.004) Elapsed 0m 41s (remain 10m 48s) Loss: 0.1241(0.1190) EVAL: [252/3883] Data 0.001 (0.003) Elapsed 0m 44s (remain 10m 44s) Loss: 0.0723(0.1161) EVAL: [270/3883] Data 0.001 (0.003) Elapsed 0m 48s (remain 10m 40s) Loss: 0.0485(0.1182) EVAL: [288/3883] Data 0.001 (0.003) Elapsed 0m 51s (remain 10m 37s) Loss: 0.3243(0.1197) EVAL: [306/3883] Data 0.001 (0.003) Elapsed 0m 54s (remain 10m 33s) Loss: 0.0484(0.1190) EVAL: [324/3883] Data 0.001 (0.003) Elapsed 0m 57s (remain 10m 29s) Loss: 0.0660(0.1188) EVAL: [342/3883] Data 0.001 (0.003) Elapsed 1m 0s (remain 10m 26s) Loss: 0.0069(0.1206) EVAL: [360/3883] Data 0.001 (0.003) Elapsed 1m 3s (remain 10m 22s) Loss: 0.1103(0.1205) EVAL: [378/3883] Data 0.001 (0.003) Elapsed 1m 6s (remain 10m 19s) Loss: 0.0549(0.1208) EVAL: [396/3883] Data 0.001 (0.003) Elapsed 1m 10s (remain 10m 15s) Loss: 0.2493(0.1207) EVAL: [414/3883] Data 0.001 (0.002) Elapsed 1m 13s (remain 10m 12s) Loss: 0.1204(0.1214) EVAL: [432/3883] Data 0.001 (0.002) Elapsed 1m 16s (remain 10m 9s) Loss: 0.0679(0.1215) EVAL: [450/3883] Data 0.001 (0.002) Elapsed 1m 19s (remain 10m 5s) Loss: 0.1444(0.1223) EVAL: [468/3883] Data 0.001 (0.002) Elapsed 1m 22s (remain 10m 2s) Loss: 0.0361(0.1234) EVAL: [486/3883] Data 0.001 (0.002) Elapsed 1m 25s (remain 9m 58s) Loss: 0.2899(0.1232) EVAL: [504/3883] Data 0.001 (0.002) Elapsed 1m 29s (remain 9m 55s) Loss: 0.0417(0.1238) EVAL: [522/3883] Data 0.001 (0.002) Elapsed 1m 32s (remain 9m 52s) Loss: 0.2516(0.1234) EVAL: [540/3883] Data 0.001 (0.002) Elapsed 1m 35s (remain 9m 48s) Loss: 0.0489(0.1222) EVAL: [558/3883] Data 0.001 (0.002) Elapsed 1m 38s (remain 9m 45s) Loss: 0.3048(0.1225) EVAL: [576/3883] Data 0.001 (0.002) Elapsed 1m 41s (remain 9m 42s) Loss: 0.0170(0.1217) EVAL: [594/3883] Data 0.001 (0.002) Elapsed 1m 44s (remain 9m 39s) Loss: 0.1348(0.1210) EVAL: [612/3883] Data 0.001 (0.002) Elapsed 1m 47s (remain 9m 35s) Loss: 0.0633(0.1207) EVAL: [630/3883] Data 0.001 (0.002) Elapsed 1m 51s (remain 9m 32s) Loss: 0.0349(0.1206) EVAL: [648/3883] Data 0.001 (0.002) Elapsed 1m 54s (remain 9m 29s) Loss: 0.0409(0.1206) EVAL: [666/3883] Data 0.001 (0.002) Elapsed 1m 57s (remain 9m 26s) Loss: 0.2241(0.1206) EVAL: [684/3883] Data 0.001 (0.002) Elapsed 2m 0s (remain 9m 22s) Loss: 0.1897(0.1207) EVAL: [702/3883] Data 0.001 (0.002) Elapsed 2m 3s (remain 9m 19s) Loss: 0.1043(0.1192) EVAL: [720/3883] Data 0.001 (0.002) Elapsed 2m 6s (remain 9m 16s) Loss: 0.0915(0.1186) EVAL: [738/3883] Data 0.001 (0.002) Elapsed 2m 9s (remain 9m 13s) Loss: 0.0054(0.1189) EVAL: [756/3883] Data 0.001 (0.002) Elapsed 2m 13s (remain 9m 9s) Loss: 0.1322(0.1190) EVAL: [774/3883] Data 0.002 (0.002) Elapsed 2m 16s (remain 9m 6s) Loss: 0.0417(0.1185) EVAL: [792/3883] Data 0.001 (0.002) Elapsed 2m 19s (remain 9m 3s) Loss: 0.1381(0.1190) EVAL: [810/3883] Data 0.001 (0.002) Elapsed 2m 22s (remain 9m 0s) Loss: 0.0174(0.1192) EVAL: [828/3883] Data 0.001 (0.002) Elapsed 2m 25s (remain 8m 56s) Loss: 0.0101(0.1194) EVAL: [846/3883] Data 0.001 (0.002) Elapsed 2m 28s (remain 8m 53s) Loss: 0.0361(0.1192) EVAL: [864/3883] Data 0.001 (0.002) Elapsed 2m 32s (remain 8m 50s) Loss: 0.2662(0.1200) EVAL: [882/3883] Data 0.001 (0.002) Elapsed 2m 35s (remain 8m 47s) Loss: 0.0053(0.1199) EVAL: [900/3883] Data 0.001 (0.002) Elapsed 2m 38s (remain 8m 44s) Loss: 0.2504(0.1194) EVAL: [918/3883] Data 0.001 (0.002) Elapsed 2m 41s (remain 8m 40s) Loss: 0.1164(0.1193) EVAL: [936/3883] Data 0.001 (0.002) Elapsed 2m 44s (remain 8m 37s) Loss: 0.2322(0.1194) EVAL: [954/3883] Data 0.001 (0.002) Elapsed 2m 47s (remain 8m 34s) Loss: 0.0407(0.1189) EVAL: [972/3883] Data 0.001 (0.002) Elapsed 2m 50s (remain 8m 31s) Loss: 0.0510(0.1191) EVAL: [990/3883] Data 0.001 (0.002) Elapsed 2m 54s (remain 8m 28s) Loss: 0.0965(0.1186) EVAL: [1008/3883] Data 0.001 (0.002) Elapsed 2m 57s (remain 8m 24s) Loss: 0.3469(0.1192) EVAL: [1026/3883] Data 0.001 (0.002) Elapsed 3m 0s (remain 8m 21s) Loss: 0.0226(0.1186) EVAL: [1044/3883] Data 0.001 (0.002) Elapsed 3m 3s (remain 8m 18s) Loss: 0.0166(0.1185) EVAL: [1062/3883] Data 0.001 (0.002) Elapsed 3m 6s (remain 8m 15s) Loss: 0.0777(0.1182) EVAL: [1080/3883] Data 0.001 (0.001) Elapsed 3m 9s (remain 8m 12s) Loss: 0.0513(0.1180) EVAL: [1098/3883] Data 0.001 (0.001) Elapsed 3m 12s (remain 8m 8s) Loss: 0.1287(0.1176) EVAL: [1116/3883] Data 0.001 (0.001) Elapsed 3m 16s (remain 8m 5s) Loss: 0.0837(0.1177) EVAL: [1134/3883] Data 0.001 (0.001) Elapsed 3m 19s (remain 8m 2s) Loss: 0.0082(0.1176) EVAL: [1152/3883] Data 0.001 (0.001) Elapsed 3m 22s (remain 7m 59s) Loss: 0.1557(0.1174) EVAL: [1170/3883] Data 0.001 (0.001) Elapsed 3m 25s (remain 7m 56s) Loss: 0.1254(0.1170) EVAL: [1188/3883] Data 0.001 (0.001) Elapsed 3m 28s (remain 7m 52s) Loss: 0.1065(0.1168) EVAL: [1206/3883] Data 0.001 (0.001) Elapsed 3m 31s (remain 7m 49s) Loss: 0.0211(0.1167) EVAL: [1224/3883] Data 0.001 (0.001) Elapsed 3m 35s (remain 7m 46s) Loss: 0.1735(0.1167) EVAL: [1242/3883] Data 0.001 (0.001) Elapsed 3m 38s (remain 7m 43s) Loss: 0.1099(0.1168) EVAL: [1260/3883] Data 0.001 (0.001) Elapsed 3m 41s (remain 7m 40s) Loss: 0.4532(0.1171) EVAL: [1278/3883] Data 0.001 (0.001) Elapsed 3m 44s (remain 7m 37s) Loss: 0.2501(0.1176) EVAL: [1296/3883] Data 0.001 (0.001) Elapsed 3m 47s (remain 7m 33s) Loss: 0.0106(0.1177) EVAL: [1314/3883] Data 0.001 (0.001) Elapsed 3m 50s (remain 7m 30s) Loss: 0.0335(0.1180) EVAL: [1332/3883] Data 0.001 (0.001) Elapsed 3m 53s (remain 7m 27s) Loss: 0.1450(0.1179) EVAL: [1350/3883] Data 0.001 (0.001) Elapsed 3m 57s (remain 7m 24s) Loss: 0.0164(0.1179) EVAL: [1368/3883] Data 0.001 (0.001) Elapsed 4m 0s (remain 7m 21s) Loss: 0.2317(0.1186) EVAL: [1386/3883] Data 0.001 (0.001) Elapsed 4m 3s (remain 7m 18s) Loss: 0.0773(0.1184) EVAL: [1404/3883] Data 0.001 (0.001) Elapsed 4m 6s (remain 7m 14s) Loss: 0.0714(0.1185) EVAL: [1422/3883] Data 0.001 (0.001) Elapsed 4m 9s (remain 7m 11s) Loss: 0.1176(0.1186) EVAL: [1440/3883] Data 0.001 (0.001) Elapsed 4m 12s (remain 7m 8s) Loss: 0.1125(0.1181) EVAL: [1458/3883] Data 0.001 (0.001) Elapsed 4m 16s (remain 7m 5s) Loss: 0.0483(0.1182) EVAL: [1476/3883] Data 0.001 (0.001) Elapsed 4m 19s (remain 7m 2s) Loss: 0.0728(0.1180) EVAL: [1494/3883] Data 0.001 (0.001) Elapsed 4m 22s (remain 6m 58s) Loss: 0.2303(0.1178) EVAL: [1512/3883] Data 0.001 (0.001) Elapsed 4m 25s (remain 6m 55s) Loss: 0.3369(0.1178) EVAL: [1530/3883] Data 0.001 (0.001) Elapsed 4m 28s (remain 6m 52s) Loss: 0.3106(0.1181) EVAL: [1548/3883] Data 0.002 (0.001) Elapsed 4m 31s (remain 6m 49s) Loss: 0.0873(0.1178) EVAL: [1566/3883] Data 0.001 (0.001) Elapsed 4m 34s (remain 6m 46s) Loss: 0.2376(0.1176) EVAL: [1584/3883] Data 0.001 (0.001) Elapsed 4m 38s (remain 6m 43s) Loss: 0.0064(0.1176) EVAL: [1602/3883] Data 0.001 (0.001) Elapsed 4m 41s (remain 6m 39s) Loss: 0.2791(0.1180) EVAL: [1620/3883] Data 0.001 (0.001) Elapsed 4m 44s (remain 6m 36s) Loss: 0.1936(0.1186) EVAL: [1638/3883] Data 0.001 (0.001) Elapsed 4m 47s (remain 6m 33s) Loss: 0.1808(0.1188) EVAL: [1656/3883] Data 0.001 (0.001) Elapsed 4m 50s (remain 6m 30s) Loss: 0.1033(0.1189) EVAL: [1674/3883] Data 0.001 (0.001) Elapsed 4m 53s (remain 6m 27s) Loss: 0.2458(0.1190) EVAL: [1692/3883] Data 0.001 (0.001) Elapsed 4m 56s (remain 6m 24s) Loss: 0.0182(0.1188) EVAL: [1710/3883] Data 0.001 (0.001) Elapsed 5m 0s (remain 6m 20s) Loss: 0.0635(0.1191) EVAL: [1728/3883] Data 0.001 (0.001) Elapsed 5m 3s (remain 6m 17s) Loss: 0.0499(0.1191) EVAL: [1746/3883] Data 0.001 (0.001) Elapsed 5m 6s (remain 6m 14s) Loss: 0.0914(0.1191) EVAL: [1764/3883] Data 0.001 (0.001) Elapsed 5m 9s (remain 6m 11s) Loss: 0.1734(0.1195) EVAL: [1782/3883] Data 0.001 (0.001) Elapsed 5m 12s (remain 6m 8s) Loss: 0.4139(0.1196) EVAL: [1800/3883] Data 0.001 (0.001) Elapsed 5m 15s (remain 6m 5s) Loss: 0.1868(0.1197) EVAL: [1818/3883] Data 0.001 (0.001) Elapsed 5m 19s (remain 6m 1s) Loss: 0.0133(0.1197) EVAL: [1836/3883] Data 0.002 (0.001) Elapsed 5m 22s (remain 5m 58s) Loss: 0.0377(0.1198) EVAL: [1854/3883] Data 0.001 (0.001) Elapsed 5m 25s (remain 5m 55s) Loss: 0.0036(0.1199) EVAL: [1872/3883] Data 0.001 (0.001) Elapsed 5m 28s (remain 5m 52s) Loss: 0.2232(0.1199) EVAL: [1890/3883] Data 0.001 (0.001) Elapsed 5m 31s (remain 5m 49s) Loss: 0.0216(0.1200) EVAL: [1908/3883] Data 0.001 (0.001) Elapsed 5m 34s (remain 5m 46s) Loss: 0.0850(0.1199) EVAL: [1926/3883] Data 0.001 (0.001) Elapsed 5m 37s (remain 5m 42s) Loss: 0.0108(0.1198) EVAL: [1944/3883] Data 0.001 (0.001) Elapsed 5m 41s (remain 5m 39s) Loss: 0.0075(0.1200) EVAL: [1962/3883] Data 0.001 (0.001) Elapsed 5m 44s (remain 5m 36s) Loss: 0.2115(0.1203) EVAL: [1980/3883] Data 0.001 (0.001) Elapsed 5m 47s (remain 5m 33s) Loss: 0.2181(0.1204) EVAL: [1998/3883] Data 0.001 (0.001) Elapsed 5m 50s (remain 5m 30s) Loss: 0.1364(0.1204) EVAL: [2016/3883] Data 0.001 (0.001) Elapsed 5m 53s (remain 5m 27s) Loss: 0.0129(0.1207) EVAL: [2034/3883] Data 0.001 (0.001) Elapsed 5m 56s (remain 5m 24s) Loss: 0.0212(0.1205) EVAL: [2052/3883] Data 0.001 (0.001) Elapsed 5m 59s (remain 5m 20s) Loss: 0.2810(0.1204) EVAL: [2070/3883] Data 0.001 (0.001) Elapsed 6m 3s (remain 5m 17s) Loss: 0.2497(0.1206) EVAL: [2088/3883] Data 0.001 (0.001) Elapsed 6m 6s (remain 5m 14s) Loss: 0.0148(0.1203) EVAL: [2106/3883] Data 0.001 (0.001) Elapsed 6m 9s (remain 5m 11s) Loss: 0.1064(0.1203) EVAL: [2124/3883] Data 0.001 (0.001) Elapsed 6m 12s (remain 5m 8s) Loss: 0.0633(0.1199) EVAL: [2142/3883] Data 0.001 (0.001) Elapsed 6m 15s (remain 5m 5s) Loss: 0.0671(0.1199) EVAL: [2160/3883] Data 0.001 (0.001) Elapsed 6m 18s (remain 5m 1s) Loss: 0.1183(0.1196) EVAL: [2178/3883] Data 0.001 (0.001) Elapsed 6m 22s (remain 4m 58s) Loss: 0.0083(0.1193) EVAL: [2196/3883] Data 0.001 (0.001) Elapsed 6m 25s (remain 4m 55s) Loss: 0.0462(0.1192) EVAL: [2214/3883] Data 0.001 (0.001) Elapsed 6m 28s (remain 4m 52s) Loss: 0.0874(0.1192) EVAL: [2232/3883] Data 0.001 (0.001) Elapsed 6m 31s (remain 4m 49s) Loss: 0.1958(0.1193) EVAL: [2250/3883] Data 0.001 (0.001) Elapsed 6m 34s (remain 4m 46s) Loss: 0.5086(0.1194) EVAL: [2268/3883] Data 0.001 (0.001) Elapsed 6m 37s (remain 4m 42s) Loss: 0.0716(0.1196) EVAL: [2286/3883] Data 0.001 (0.001) Elapsed 6m 40s (remain 4m 39s) Loss: 0.2039(0.1197) EVAL: [2304/3883] Data 0.001 (0.001) Elapsed 6m 44s (remain 4m 36s) Loss: 0.1937(0.1195) EVAL: [2322/3883] Data 0.001 (0.001) Elapsed 6m 47s (remain 4m 33s) Loss: 0.0081(0.1194) EVAL: [2340/3883] Data 0.001 (0.001) Elapsed 6m 50s (remain 4m 30s) Loss: 0.1219(0.1194) EVAL: [2358/3883] Data 0.001 (0.001) Elapsed 6m 53s (remain 4m 27s) Loss: 0.3142(0.1193) EVAL: [2376/3883] Data 0.001 (0.001) Elapsed 6m 56s (remain 4m 23s) Loss: 0.1997(0.1195) EVAL: [2394/3883] Data 0.001 (0.001) Elapsed 6m 59s (remain 4m 20s) Loss: 0.4206(0.1194) EVAL: [2412/3883] Data 0.001 (0.001) Elapsed 7m 2s (remain 4m 17s) Loss: 0.1616(0.1193) EVAL: [2430/3883] Data 0.001 (0.001) Elapsed 7m 6s (remain 4m 14s) Loss: 0.0279(0.1192) EVAL: [2448/3883] Data 0.001 (0.001) Elapsed 7m 9s (remain 4m 11s) Loss: 0.0847(0.1189) EVAL: [2466/3883] Data 0.001 (0.001) Elapsed 7m 12s (remain 4m 8s) Loss: 0.0852(0.1188) EVAL: [2484/3883] Data 0.001 (0.001) Elapsed 7m 15s (remain 4m 5s) Loss: 0.2764(0.1189) EVAL: [2502/3883] Data 0.001 (0.001) Elapsed 7m 18s (remain 4m 1s) Loss: 0.0444(0.1191) EVAL: [2520/3883] Data 0.001 (0.001) Elapsed 7m 21s (remain 3m 58s) Loss: 0.0269(0.1187) EVAL: [2538/3883] Data 0.001 (0.001) Elapsed 7m 25s (remain 3m 55s) Loss: 0.0688(0.1191) EVAL: [2556/3883] Data 0.001 (0.001) Elapsed 7m 28s (remain 3m 52s) Loss: 0.4862(0.1191) EVAL: [2574/3883] Data 0.001 (0.001) Elapsed 7m 31s (remain 3m 49s) Loss: 0.5755(0.1192) EVAL: [2592/3883] Data 0.001 (0.001) Elapsed 7m 34s (remain 3m 46s) Loss: 0.0426(0.1193) EVAL: [2610/3883] Data 0.003 (0.001) Elapsed 7m 37s (remain 3m 42s) Loss: 0.1562(0.1196) EVAL: [2628/3883] Data 0.001 (0.001) Elapsed 7m 40s (remain 3m 39s) Loss: 0.0435(0.1197) EVAL: [2646/3883] Data 0.001 (0.001) Elapsed 7m 43s (remain 3m 36s) Loss: 0.2066(0.1196) EVAL: [2664/3883] Data 0.001 (0.001) Elapsed 7m 47s (remain 3m 33s) Loss: 0.0542(0.1195) EVAL: [2682/3883] Data 0.001 (0.001) Elapsed 7m 50s (remain 3m 30s) Loss: 0.1454(0.1194) EVAL: [2700/3883] Data 0.001 (0.001) Elapsed 7m 53s (remain 3m 27s) Loss: 0.0557(0.1194) EVAL: [2718/3883] Data 0.001 (0.001) Elapsed 7m 56s (remain 3m 24s) Loss: 0.1880(0.1194) EVAL: [2736/3883] Data 0.001 (0.001) Elapsed 7m 59s (remain 3m 20s) Loss: 0.0352(0.1196) EVAL: [2754/3883] Data 0.001 (0.001) Elapsed 8m 2s (remain 3m 17s) Loss: 0.0789(0.1200) EVAL: [2772/3883] Data 0.001 (0.001) Elapsed 8m 5s (remain 3m 14s) Loss: 0.1448(0.1201) EVAL: [2790/3883] Data 0.001 (0.001) Elapsed 8m 9s (remain 3m 11s) Loss: 0.0223(0.1200) EVAL: [2808/3883] Data 0.001 (0.001) Elapsed 8m 12s (remain 3m 8s) Loss: 0.2040(0.1200) EVAL: [2826/3883] Data 0.001 (0.001) Elapsed 8m 15s (remain 3m 5s) Loss: 0.1215(0.1199) EVAL: [2844/3883] Data 0.001 (0.001) Elapsed 8m 18s (remain 3m 1s) Loss: 0.1209(0.1199) EVAL: [2862/3883] Data 0.001 (0.001) Elapsed 8m 21s (remain 2m 58s) Loss: 0.2697(0.1198) EVAL: [2880/3883] Data 0.001 (0.001) Elapsed 8m 24s (remain 2m 55s) Loss: 0.0137(0.1197) EVAL: [2898/3883] Data 0.001 (0.001) Elapsed 8m 28s (remain 2m 52s) Loss: 0.2341(0.1197) EVAL: [2916/3883] Data 0.001 (0.001) Elapsed 8m 31s (remain 2m 49s) Loss: 0.1367(0.1196) EVAL: [2934/3883] Data 0.001 (0.001) Elapsed 8m 34s (remain 2m 46s) Loss: 0.3458(0.1196) EVAL: [2952/3883] Data 0.001 (0.001) Elapsed 8m 37s (remain 2m 42s) Loss: 0.0121(0.1197) EVAL: [2970/3883] Data 0.001 (0.001) Elapsed 8m 40s (remain 2m 39s) Loss: 0.0445(0.1197) EVAL: [2988/3883] Data 0.001 (0.001) Elapsed 8m 43s (remain 2m 36s) Loss: 0.0625(0.1198) EVAL: [3006/3883] Data 0.001 (0.001) Elapsed 8m 46s (remain 2m 33s) Loss: 0.0730(0.1197) EVAL: [3024/3883] Data 0.001 (0.001) Elapsed 8m 50s (remain 2m 30s) Loss: 0.3513(0.1197) EVAL: [3042/3883] Data 0.001 (0.001) Elapsed 8m 53s (remain 2m 27s) Loss: 0.0354(0.1196) EVAL: [3060/3883] Data 0.001 (0.001) Elapsed 8m 56s (remain 2m 24s) Loss: 0.1735(0.1196) EVAL: [3078/3883] Data 0.001 (0.001) Elapsed 8m 59s (remain 2m 20s) Loss: 0.1837(0.1196) EVAL: [3096/3883] Data 0.001 (0.001) Elapsed 9m 2s (remain 2m 17s) Loss: 0.5497(0.1197) EVAL: [3114/3883] Data 0.001 (0.001) Elapsed 9m 5s (remain 2m 14s) Loss: 0.1262(0.1197) EVAL: [3132/3883] Data 0.001 (0.001) Elapsed 9m 9s (remain 2m 11s) Loss: 0.0709(0.1196) EVAL: [3150/3883] Data 0.001 (0.001) Elapsed 9m 12s (remain 2m 8s) Loss: 0.0749(0.1195) EVAL: [3168/3883] Data 0.001 (0.001) Elapsed 9m 15s (remain 2m 5s) Loss: 0.2820(0.1194) EVAL: [3186/3883] Data 0.001 (0.001) Elapsed 9m 18s (remain 2m 1s) Loss: 0.0034(0.1191) EVAL: [3204/3883] Data 0.001 (0.001) Elapsed 9m 21s (remain 1m 58s) Loss: 0.0324(0.1191) EVAL: [3222/3883] Data 0.001 (0.001) Elapsed 9m 24s (remain 1m 55s) Loss: 0.2705(0.1191) EVAL: [3240/3883] Data 0.001 (0.001) Elapsed 9m 27s (remain 1m 52s) Loss: 0.4488(0.1191) EVAL: [3258/3883] Data 0.001 (0.001) Elapsed 9m 31s (remain 1m 49s) Loss: 0.0225(0.1191) EVAL: [3276/3883] Data 0.001 (0.001) Elapsed 9m 34s (remain 1m 46s) Loss: 0.0930(0.1190) EVAL: [3294/3883] Data 0.001 (0.001) Elapsed 9m 37s (remain 1m 43s) Loss: 0.1752(0.1191) EVAL: [3312/3883] Data 0.001 (0.001) Elapsed 9m 40s (remain 1m 39s) Loss: 0.0049(0.1192) EVAL: [3330/3883] Data 0.001 (0.001) Elapsed 9m 43s (remain 1m 36s) Loss: 0.2752(0.1190) EVAL: [3348/3883] Data 0.001 (0.001) Elapsed 9m 46s (remain 1m 33s) Loss: 0.0474(0.1191) EVAL: [3366/3883] Data 0.001 (0.001) Elapsed 9m 49s (remain 1m 30s) Loss: 0.0093(0.1190) EVAL: [3384/3883] Data 0.001 (0.001) Elapsed 9m 53s (remain 1m 27s) Loss: 0.1289(0.1191) EVAL: [3402/3883] Data 0.001 (0.001) Elapsed 9m 56s (remain 1m 24s) Loss: 0.1870(0.1192) EVAL: [3420/3883] Data 0.001 (0.001) Elapsed 9m 59s (remain 1m 20s) Loss: 0.1693(0.1191) EVAL: [3438/3883] Data 0.001 (0.001) Elapsed 10m 2s (remain 1m 17s) Loss: 0.1884(0.1193) EVAL: [3456/3883] Data 0.001 (0.001) Elapsed 10m 5s (remain 1m 14s) Loss: 0.2983(0.1195) EVAL: [3474/3883] Data 0.001 (0.001) Elapsed 10m 8s (remain 1m 11s) Loss: 0.0266(0.1195) EVAL: [3492/3883] Data 0.001 (0.001) Elapsed 10m 12s (remain 1m 8s) Loss: 0.1362(0.1195) EVAL: [3510/3883] Data 0.001 (0.001) Elapsed 10m 15s (remain 1m 5s) Loss: 0.0612(0.1195) EVAL: [3528/3883] Data 0.001 (0.001) Elapsed 10m 18s (remain 1m 2s) Loss: 0.6071(0.1197) EVAL: [3546/3883] Data 0.001 (0.001) Elapsed 10m 21s (remain 0m 58s) Loss: 0.2709(0.1197) EVAL: [3564/3883] Data 0.001 (0.001) Elapsed 10m 24s (remain 0m 55s) Loss: 0.1413(0.1195) EVAL: [3582/3883] Data 0.001 (0.001) Elapsed 10m 27s (remain 0m 52s) Loss: 0.0880(0.1196) EVAL: [3600/3883] Data 0.001 (0.001) Elapsed 10m 30s (remain 0m 49s) Loss: 0.0672(0.1195) EVAL: [3618/3883] Data 0.001 (0.001) Elapsed 10m 34s (remain 0m 46s) Loss: 0.1433(0.1195) EVAL: [3636/3883] Data 0.001 (0.001) Elapsed 10m 37s (remain 0m 43s) Loss: 0.0203(0.1196) EVAL: [3654/3883] Data 0.001 (0.001) Elapsed 10m 40s (remain 0m 39s) Loss: 0.0481(0.1195) EVAL: [3672/3883] Data 0.001 (0.001) Elapsed 10m 43s (remain 0m 36s) Loss: 0.4344(0.1196) EVAL: [3690/3883] Data 0.002 (0.001) Elapsed 10m 46s (remain 0m 33s) Loss: 0.0268(0.1198) EVAL: [3708/3883] Data 0.001 (0.001) Elapsed 10m 49s (remain 0m 30s) Loss: 0.0414(0.1199) EVAL: [3726/3883] Data 0.001 (0.001) Elapsed 10m 52s (remain 0m 27s) Loss: 0.1762(0.1199) EVAL: [3744/3883] Data 0.001 (0.001) Elapsed 10m 56s (remain 0m 24s) Loss: 0.1394(0.1199) EVAL: [3762/3883] Data 0.001 (0.001) Elapsed 10m 59s (remain 0m 21s) Loss: 0.0218(0.1199) EVAL: [3780/3883] Data 0.001 (0.001) Elapsed 11m 2s (remain 0m 17s) Loss: 0.0093(0.1198) EVAL: [3798/3883] Data 0.001 (0.001) Elapsed 11m 5s (remain 0m 14s) Loss: 0.0059(0.1198) EVAL: [3816/3883] Data 0.001 (0.001) Elapsed 11m 8s (remain 0m 11s) Loss: 0.0142(0.1199) EVAL: [3834/3883] Data 0.001 (0.001) Elapsed 11m 11s (remain 0m 8s) Loss: 0.0298(0.1200) EVAL: [3852/3883] Data 0.001 (0.001) Elapsed 11m 15s (remain 0m 5s) Loss: 0.2243(0.1200) EVAL: [3870/3883] Data 0.001 (0.001) Elapsed 11m 18s (remain 0m 2s) Loss: 0.0365(0.1198)
Epoch 10 - avg_train_loss: 0.2623 avg_val_loss: 0.1198 time: 1126s Epoch 10 - AUC: 0.9672965135013212 - pAUC: 0.178370604283474 Epoch 10 - Save Best Score: 0.1784 Model
EVAL: [3882/3883] Data 0.001 (0.001) Elapsed 11m 20s (remain 0m 0s) Loss: 0.2894(0.1198)
========== CV ==========
use Accelerator
Cell In[56], line 1 use Accelerator ^ SyntaxError: invalid syntax
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def asMinutes(s):
m = math.floor(s / 60)
s -= m * 60
return '%dm %ds' % (m, s)
def timeSince(since, percent):
now = time.time()
s = now - since
es = s / (percent)
rs = es - s
return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))
def train_fn(train_loader, model, criterion, optimizer, epoch, scheduler , accelerator):
batch_time = AverageMeter()
data_time = AverageMeter()
losses = AverageMeter()
scores = AverageMeter()
# switch to train mode
model.train()
start = end = time.time()
global_step = 0
for step, (images, labels , file_name , data) in enumerate(train_loader):
# measure data loading time
data_time.update(time.time() - end)
# Automated action while using HF accelerator
images = images.to(device)
labels = labels.to(device)
batch_size = labels.size(0)
meta = data[1].to(device)
print(data[1])
y_preds = model(images , meta)
# debug
# print(torch.nn.functional.softmax(y_preds, dim=1))
# print(labels)
loss = criterion(y_preds, labels)
# record loss
losses.update(loss.item(), batch_size)
# Backward pass with accelerator
accelerator.backward(loss)
# https://huggingface.co/docs/accelerate/package_reference/accelerator
# read more about why you need to put optimizer.step() after the gradient clipping here else the grad_norm will be always 0 (always even without accelerator)
if accelerator.sync_gradients:
grad_norm = accelerator.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
# optimizer step
optimizer.step()
optimizer.zero_grad()
if (step + 1) % CFG.gradient_accumulation_steps == 0:
# optimizer.step()
# optimizer.zero_grad()
global_step += 1
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
print('Epoch: [{0}][{1}/{2}] '
'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
'Elapsed {remain:s} '
'Loss: {loss.val:.4f}({loss.avg:.4f}) '
'Grad: {grad_norm:.4f} '
'LR: {lr:.6f} '
.format(
epoch+1, step, len(train_loader), batch_time=batch_time,
data_time=data_time, loss=losses,
remain=timeSince(start, float(step+1)/len(train_loader)),
grad_norm=grad_norm,
lr=scheduler.get_lr()[0],
))
wandb.log({
"Train Loss": losses.val,
"Step": step,
"Gradient Norm": grad_norm,
"Learning Rate": scheduler.get_lr()[0], # Add this line to log the learning rate
})
return losses.avg
def valid_fn(valid_loader, model, criterion , accelerator):
batch_time = AverageMeter()
data_time = AverageMeter()
losses = AverageMeter()
scores = AverageMeter()
model.eval()
preds = []
start = end = time.time()
gathered_labels = []
gathered_file_names = []
gathered_y_preds = []
for step, (images, labels , file_name , data) in enumerate(valid_loader):
data_time.update(time.time() - end)
# images = images.to(accelerator.device)
# labels = labels.to(accelerator.device)
batch_size = labels.size(0)
meta = data[1].to(device)
with torch.no_grad():
y_preds = model(images , meta)
loss = criterion(y_preds, labels)
losses.update(loss.item(), batch_size)
y_preds = torch.nn.functional.softmax(y_preds, dim=1)
# Gather needed items
####################################
# checkout gather_for_metrics https://huggingface.co/docs/accelerate/quicktour
gathered_y_preds.append(accelerator.gather_for_metrics(y_preds).cpu().numpy())
gathered_labels.append(accelerator.gather_for_metrics(labels).cpu().numpy())
file_name_array = np.array(file_name)
gathered_file_names.append(np.array(accelerator.gather_for_metrics(file_name_array)))
####################################
preds.append(y_preds.to('cpu').numpy())
# preds.append(y_preds.numpy())
# if (step + 1) % CFG.gradient_accumulation_steps == 0:
# pass
batch_time.update(time.time() - end)
end = time.time()
if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
print('EVAL: [{0}/{1}] '
'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
'Elapsed {remain:s} '
'Loss: {loss.val:.4f}({loss.avg:.4f}) '
.format(
step, len(valid_loader), batch_time=batch_time,
data_time=data_time, loss=losses,
remain=timeSince(start, float(step+1)/len(valid_loader)),
))
wandb.log({
"Val Loss ": losses.val,
"Val Step": step,
})
gathered_y_preds = np.concatenate(gathered_y_preds)
gathered_labels = np.concatenate(gathered_labels)
gathered_file_names = np.concatenate(gathered_file_names)
predictions = np.concatenate(preds)
return losses.avg, predictions ,gathered_labels , gathered_file_names , gathered_y_preds
def train_loop(folds, fold , model, accelerator): #train + validation
LOGGER.info(f"========== fold: {fold} training ==========")
# ====================================================
# loader
# ====================================================
trn_idx = folds[folds['fold'] != fold].index
val_idx = folds[folds['fold'] == fold].index
train_folds = folds.loc[trn_idx].reset_index(drop=True)
valid_folds = folds.loc[val_idx].reset_index(drop=True)
# print(f'valid shape : {valid_folds.shape}')
train_dataset = TrainDataset(train_folds,
transform=get_transforms(data='train'))
valid_dataset = TrainDataset(valid_folds,
transform=get_transforms(data='valid'))
train_loader = DataLoader(train_dataset,
batch_size=CFG.batch_size,
shuffle=False,
num_workers=CFG.num_workers ,
pin_memory=True, drop_last=True )
valid_loader = DataLoader(valid_dataset,
batch_size=CFG.batch_size,
shuffle=False,
num_workers=CFG.num_workers, pin_memory=True, drop_last=False )
# ====================================================
# scheduler
# ====================================================
def get_scheduler(optimizer):
if CFG.scheduler=='ReduceLROnPlateau':
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
elif CFG.scheduler=='CosineAnnealingLR':
scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
elif CFG.scheduler=='CosineAnnealingWarmRestarts':
scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr, last_epoch=-1)
return scheduler
# ====================================================
# model & optimizer
# ====================================================
# model.to(iceice)
optimizer = AdamW(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay, amsgrad=False , )
scheduler = get_scheduler(optimizer)
# ====================================================
# apex
# ====================================================
if CFG.apex:
model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
# ====================================================
# loop
# ====================================================
if CFG.weighted_loss :
criterion = nn.CrossEntropyLoss(weight = class_weights_tensor.to(accelerator.device))
else :
criterion = nn.CrossEntropyLoss()
model, optimizer, train_loader, scheduler, valid_loader = accelerator.prepare(
model, optimizer, train_loader, scheduler, valid_loader
)
best_score = 50000
best_loss = np.inf
for epoch in range(CFG.epochs):
torch.cuda.empty_cache()
start_time = time.time()
# train
avg_loss = train_fn(train_loader, model, criterion, optimizer, epoch, scheduler , accelerator)
# accelerator.wait_for_everyone()
# unwrapped_model = accelerator.unwrap_model(model)
# LOGGER.info(f'Epoch {epoch+1} - Saving model before evaluation.')
# accelerator.save(unwrapped_model.state_dict(), OUTPUT_DIR+f'{CFG.model_name}_epoch{epoch}_pre_eval.pth')
# eval
avg_val_loss, preds , gathered_labels , gathered_file_names , gathered_preds = valid_fn(valid_loader, model, criterion, accelerator)
# Convert predictions to a tensor if they are in NumPy format
if isinstance(preds, np.ndarray):
preds = torch.tensor(preds).to(accelerator.device)
if isinstance(scheduler, ReduceLROnPlateau):
scheduler.step(avg_val_loss)
elif isinstance(scheduler, CosineAnnealingLR):
scheduler.step()
elif isinstance(scheduler, CosineAnnealingWarmRestarts):
scheduler.step()
# print(f"where {gathered_preds[:, 1]}")
# print(f"gathered true: {len(gathered_labels)}, gathered pred {len(gathered_preds[:, 1])}, true shape {len(valid_folds['img_name'].values)} , new technique : {len(gathered_file_names)}")
# scoring
score = get_score(gathered_labels, gathered_preds)
score2 = roc_auc_score(gathered_labels, gathered_preds[:, 1])
wandb.log({"pAUC" : score2})
elapsed = time.time() - start_time
LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f} avg_val_loss: {avg_val_loss:.4f} time: {elapsed:.0f}s') #.info makes the msg shows in red cadre
LOGGER.info(f'Epoch {epoch+1} - LogLoss: {score} - AUC: {score2}')
# Unwraps the model from the additional layer possible added by prepare(). Useful before saving the model.
accelerator.wait_for_everyone()
model = accelerator.unwrap_model(model)
oof_pred = pd.DataFrame({"True" : gathered_labels , "Predicted" : gathered_preds[:, 1] , "img_name" : gathered_file_names })
oof_pred.to_csv(f'OOF_pred_fold{fold}_epoch_{epoch}.csv' , index =False)
print("Checking files names of the oof ...")
if (valid_folds['img_name'].values == oof_pred['img_name'].values).all() and (valid_folds['target'].values == oof_pred['True'].values).all() :
LOGGER.info('Successful OOF predictions released!')
else :
LOGGER.info('There is a problem with the released OOF predictions.')
if score < best_score:
best_score = score
LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
accelerator.save(model,OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth')
# check_point = torch.load(OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth')
#valid_folds[[str(c) for c in range(5)]] = check_point['preds']
#valid_folds['preds'] = check_point['preds'].argmax(1)
return
def init_model(accelerator, pretrained=True):
with accelerator.main_process_first():
print(f"Initializing {CFG.model_name} model")
model = CustomResNext(CFG.model_name,num_classes = 0 ,n_meta_features=len(meta_features), pretrained = True)
return model
def inference(model, states, test_loader):
# Used for manual data parallesim but you have to address some problems caused by it too
# model = nn.DataParallel(model)
tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
probs = []
for i, (images) in tk0:
# images = images.to(device)
avg_preds = []
for state in states:
model.load_state_dict(state['model'])
model.eval()
model, test_loader = accelerator.prepare(model, test_loader)
with torch.no_grad():
y_preds = model(images)
avg_preds.append(y_preds.to('cpu').numpy())
avg_preds = np.mean(avg_preds, axis=0)
probs.append(avg_preds)
probs = np.concatenate(probs)
return probs
def main():
"""
Prepare: 1.train 2.test 3.submission 4.folds
"""
if torch.cuda.device_count() > 1:
print("Using", torch.cuda.device_count(), "GPUs!")
else:
print("Only one GPU available.")
def get_result(result_df):
preds = result_df['preds'].values
labels = result_df[CFG.target_col].values
score = get_score(labels, preds)
LOGGER.info(f'Score: {score:<.5f}')
if CFG.train:
# move from 32-bit floating point presicion to 16-bit precision / gradient accumulation / Multiple Gpu training and managment of the data flow
accelerator = Accelerator(gradient_accumulation_steps=CFG.gradient_accumulation_steps ,
mixed_precision='fp16') #bf16
# train
oof_df = pd.DataFrame()
for fold in range(CFG.n_fold):
if fold in CFG.trn_fold:
# print(f'Starting fold {fold} ...')
model = init_model(accelerator)
train_loop(folds, fold ,model, accelerator )
#oof_df = pd.concat([oof_df, _oof_df])
#LOGGER.info(f"========== fold: {fold} result ==========")
#get_result(_oof_df)
# CV result
LOGGER.info(f"========== CV ==========")
#get_result(oof_df)
# save result
#oof_df.to_csv(OUTPUT_DIR+'oof_df.csv', index=False)
if CFG.inference:
# inference
model = CustomResNext(CFG.model_name, pretrained=False)
states = [torch.load(OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth') for fold in CFG.trn_fold]
test_dataset = TestDataset(test, transform=get_transforms(data='valid'))
test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False,
num_workers=CFG.num_workers, pin_memory=True)
predictions = inference(model, states, test_loader)
# submission
print(predictions)
test['label'] = torch.nn.functional.softmax(torch.from_numpy(predictions), dim=1).numpy()[:,1]
print(test['label'])
test[['img_name', 'label']].to_csv(OUTPUT_DIR+'submission.csv', index=False)
gathered true: 280, gathered pred 280, true shape 267gathered true: 280, gathered pred 280, true shape 267
if __name__ == '__main__':
notebook_launcher(main , num_processes=2 , )
# torch.load("/kaggle/working/efficientnet_b4_fold0_best.pth")
Epoch: [1][0/7296] Data 4.836 (4.836) Elapsed 0m 12s (remain 1552m 8s) Loss: 1.2454(1.2454) Grad: 3.4937
Epoch: [1][0/7296] Data 4.755 (4.755) Elapsed 0m 12s (remain 1553m 48s) Loss: 1.5479(1.5479) Grad: 3.4937
Epoch: [1][20/7296] Data 0.020 (0.444) Elapsed 0m 36s (remain 210m 13s) Loss: 0.1604(0.4781) Grad: 0.2797
Epoch: [1][20/7296] Data 0.014 (0.546) Elapsed 0m 36s (remain 210m 18s) Loss: 0.2713(0.5126) Grad: 0.2797
Epoch: [1][40/7296] Data 0.008 (0.538) Elapsed 1m 7s (remain 197m 57s) Loss: 0.3300(0.4262) Grad: 0.6303
Epoch: [1][40/7296] Data 0.015 (0.525) Elapsed 1m 7s (remain 198m 15s) Loss: 0.6318(0.4116) Grad: 0.6303
Epoch: [1][60/7296] Data 0.033 (0.379) Elapsed 1m 35s (remain 188m 44s) Loss: 0.3123(0.3905) Grad: 0.3299
Epoch: [1][60/7296] Data 0.016 (0.523) Elapsed 1m 35s (remain 188m 47s) Loss: 0.3251(0.3737) Grad: 0.3299
Epoch: [1][80/7296] Data 0.039 (0.446) Elapsed 2m 4s (remain 185m 3s) Loss: 0.3523(0.3495) Grad: 0.3777
Epoch: [1][80/7296] Data 0.010 (0.428) Elapsed 2m 4s (remain 185m 8s) Loss: 0.2265(0.3773) Grad: 0.3777
Epoch: [1][0/7296] Data 4.909 (4.909) Elapsed 0m 12s (remain 1578m 30s) Loss: 1.5479(1.5479) Grad: inf
Epoch: [1][0/7296] Data 5.003 (5.003) Elapsed 0m 12s (remain 1580m 6s) Loss: 1.2454(1.2454) Grad: inf
Epoch: [1][20/7296] Data 0.009 (0.518) Elapsed 0m 35s (remain 205m 33s) Loss: 0.1662(0.5732) Grad: 0.5712
Epoch: [1][20/7296] Data 0.018 (0.462) Elapsed 0m 35s (remain 205m 46s) Loss: 0.2877(0.5936) Grad: 0.5712
Epoch: [1][40/7296] Data 0.029 (0.573) Elapsed 1m 5s (remain 193m 58s) Loss: 0.6414(0.4690) Grad: 1.3110
Epoch: [1][40/7296] Data 0.016 (0.480) Elapsed 1m 5s (remain 194m 1s) Loss: 0.3384(0.4711) Grad: 1.3110
Epoch: [1][60/7296] Data 0.029 (0.568) Elapsed 1m 33s (remain 185m 8s) Loss: 0.3308(0.4143) Grad: 0.6530
Epoch: [1][60/7296] Data 0.020 (0.435) Elapsed 1m 33s (remain 185m 11s) Loss: 0.3063(0.4213) Grad: 0.6530
Epoch: [1][80/7296] Data 0.009 (0.367) Elapsed 2m 2s (remain 181m 33s) Loss: 0.2340(0.4010) Grad: 0.7946
Epoch: [1][80/7296] Data 0.023 (0.574) Elapsed 2m 2s (remain 181m 33s) Loss: 0.3610(0.3808) Grad: 0.7946
def print_library_versions():
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"torchvision version: {torchvision.__version__}")
# print(f"accelerate version: {Accelerator.__version__}")
print(f"NumPy version: {np.__version__}")
print(f"Pandas version: {pd.__version__}")